nv50_shader_state.c revision 02fac2930581b9bea9f6d221eb6d6b471fc3b9c6
1/*
2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "pipe/p_context.h"
25#include "pipe/p_defines.h"
26#include "pipe/p_state.h"
27#include "util/u_inlines.h"
28
29#include "nv50_context.h"
30
31void
32nv50_constbufs_validate(struct nv50_context *nv50)
33{
34   struct nouveau_pushbuf *push = nv50->base.pushbuf;
35   unsigned s;
36
37   for (s = 0; s < 3; ++s) {
38      struct nv04_resource *res;
39      int i;
40      unsigned p, b;
41
42      if (s == PIPE_SHADER_FRAGMENT)
43         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
44      else
45      if (s == PIPE_SHADER_GEOMETRY)
46         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
47      else
48         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
49
50      while (nv50->constbuf_dirty[s]) {
51         struct nouveau_bo *bo;
52         unsigned start = 0;
53         unsigned words = 0;
54
55         i = ffs(nv50->constbuf_dirty[s]) - 1;
56         nv50->constbuf_dirty[s] &= ~(1 << i);
57
58         res = nv04_resource(nv50->constbuf[s][i]);
59         if (!res) {
60            if (i != 0) {
61               BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
62               PUSH_DATA (push, (i << 8) | p | 0);
63            }
64            continue;
65         }
66
67         if (i == 0) {
68            b = NV50_CB_PVP + s;
69
70            /* always upload GL uniforms through CB DATA */
71            bo = nv50->screen->uniforms;
72            words = res->base.width0 / 4;
73         } else {
74            b = s * 16 + i;
75
76            assert(0);
77
78            if (!nouveau_resource_mapped_by_gpu(&res->base)) {
79               nouveau_buffer_migrate(&nv50->base, res, NOUVEAU_BO_VRAM);
80
81               BEGIN_NV04(push, NV50_3D(CODE_CB_FLUSH), 1);
82               PUSH_DATA (push, 0);
83            }
84            BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
85            PUSH_DATAh(push, res->address);
86            PUSH_DATA (push, res->address);
87            PUSH_DATA (push, (b << 16) | (res->base.width0 & 0xffff));
88            BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
89            PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
90
91            bo = res->bo;
92         }
93
94         if (bo != nv50->screen->uniforms)
95            BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
96
97         while (words) {
98            unsigned nr;
99
100            if (!PUSH_SPACE(push, 16))
101               break;
102            nr = PUSH_AVAIL(push);
103            assert(nr >= 16);
104            nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
105
106            BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
107            PUSH_DATA (push, (start << 8) | b);
108            BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
109            PUSH_DATAp(push, &res->data[start * 4], nr);
110
111            start += nr;
112            words -= nr;
113         }
114      }
115   }
116}
117
118static boolean
119nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
120{
121   if (!prog->translated) {
122      prog->translated = nv50_program_translate(
123         prog, nv50->screen->base.device->chipset);
124      if (!prog->translated)
125         return FALSE;
126   } else
127   if (prog->mem)
128      return TRUE;
129
130   return nv50_program_upload_code(nv50, prog);
131}
132
133static INLINE void
134nv50_program_update_context_state(struct nv50_context *nv50,
135                                  struct nv50_program *prog, int stage)
136{
137   const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
138
139   if (prog && prog->uses_lmem) {
140      if (!nv50->state.tls_required)
141         BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
142      nv50->state.tls_required |= 1 << stage;
143   } else {
144      if (nv50->state.tls_required == (1 << stage))
145         nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
146      nv50->state.tls_required &= ~(1 << stage);
147   }
148}
149
150void
151nv50_vertprog_validate(struct nv50_context *nv50)
152{
153   struct nouveau_pushbuf *push = nv50->base.pushbuf;
154   struct nv50_program *vp = nv50->vertprog;
155
156   if (!nv50_program_validate(nv50, vp))
157         return;
158   nv50_program_update_context_state(nv50, vp, 0);
159
160   BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);
161   PUSH_DATA (push, vp->vp.attrs[0]);
162   PUSH_DATA (push, vp->vp.attrs[1]);
163   BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);
164   PUSH_DATA (push, vp->max_out);
165   BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);
166   PUSH_DATA (push, vp->max_gpr);
167   BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);
168   PUSH_DATA (push, vp->code_base);
169}
170
171void
172nv50_fragprog_validate(struct nv50_context *nv50)
173{
174   struct nouveau_pushbuf *push = nv50->base.pushbuf;
175   struct nv50_program *fp = nv50->fragprog;
176
177   if (!nv50_program_validate(nv50, fp))
178         return;
179   nv50_program_update_context_state(nv50, fp, 1);
180
181   BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
182   PUSH_DATA (push, fp->max_gpr);
183   BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
184   PUSH_DATA (push, fp->max_out);
185   BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
186   PUSH_DATA (push, fp->fp.flags[0]);
187   BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
188   PUSH_DATA (push, fp->fp.flags[1]);
189   BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
190   PUSH_DATA (push, fp->code_base);
191}
192
193void
194nv50_gmtyprog_validate(struct nv50_context *nv50)
195{
196   struct nouveau_pushbuf *push = nv50->base.pushbuf;
197   struct nv50_program *gp = nv50->gmtyprog;
198
199   if (gp) {
200      BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
201      PUSH_DATA (push, gp->max_gpr);
202      BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
203      PUSH_DATA (push, gp->max_out);
204      BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
205      PUSH_DATA (push, gp->gp.prim_type);
206      BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);
207      PUSH_DATA (push, gp->gp.vert_count);
208      BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
209      PUSH_DATA (push, gp->code_base);
210
211      nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
212   }
213   nv50_program_update_context_state(nv50, gp, 2);
214
215   /* GP_ENABLE is updated in linkage validation */
216}
217
218static void
219nv50_sprite_coords_validate(struct nv50_context *nv50)
220{
221   struct nouveau_pushbuf *push = nv50->base.pushbuf;
222   uint32_t pntc[8], mode;
223   struct nv50_program *fp = nv50->fragprog;
224   unsigned i, c;
225   unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
226
227   if (!nv50->rast->pipe.point_quad_rasterization) {
228      if (nv50->state.point_sprite) {
229         BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
230         for (i = 0; i < 8; ++i)
231            PUSH_DATA(push, 0);
232
233         nv50->state.point_sprite = FALSE;
234      }
235      return;
236   } else {
237      nv50->state.point_sprite = TRUE;
238   }
239
240   memset(pntc, 0, sizeof(pntc));
241
242   for (i = 0; i < fp->in_nr; i++) {
243      unsigned n = util_bitcount(fp->in[i].mask);
244
245      if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
246         m += n;
247         continue;
248      }
249      if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
250         m += n;
251         continue;
252      }
253
254      for (c = 0; c < 4; ++c) {
255         if (fp->in[i].mask & (1 << c)) {
256            pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
257            ++m;
258         }
259      }
260   }
261
262   if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
263      mode = 0x00;
264   else
265      mode = 0x10;
266
267   BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);
268   PUSH_DATA (push, mode);
269
270   BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
271   PUSH_DATAp(push, pntc, 8);
272}
273
274/* Validate state derived from shaders and the rasterizer cso. */
275void
276nv50_validate_derived_rs(struct nv50_context *nv50)
277{
278   struct nouveau_pushbuf *push = nv50->base.pushbuf;
279   uint32_t color, psize;
280
281   nv50_sprite_coords_validate(nv50);
282
283   if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
284      nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
285      BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
286      PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
287   }
288
289   if (nv50->dirty & NV50_NEW_FRAGPROG)
290      return;
291   psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
292   color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
293
294   if (nv50->rast->pipe.clamp_vertex_color)
295      color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
296
297   if (color != nv50->state.semantic_color) {
298      nv50->state.semantic_color = color;
299      BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);
300      PUSH_DATA (push, color);
301   }
302
303   if (nv50->rast->pipe.point_size_per_vertex)
304      psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
305
306   if (psize != nv50->state.semantic_psize) {
307      nv50->state.semantic_psize = psize;
308      BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);
309      PUSH_DATA (push, psize);
310   }
311}
312
313static int
314nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
315              struct nv50_varying *in, struct nv50_varying *out)
316{
317   int c;
318   uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
319
320   for (c = 0; c < 4; ++c) {
321      if (mf & 1) {
322         if (in->linear)
323            lin[mid / 32] |= 1 << (mid % 32);
324         if (mv & 1)
325            map[mid] = oid;
326         else
327         if (c == 3)
328            map[mid] |= 1;
329         ++mid;
330      }
331
332      oid += mv & 1;
333      mf >>= 1;
334      mv >>= 1;
335   }
336
337   return mid;
338}
339
340void
341nv50_fp_linkage_validate(struct nv50_context *nv50)
342{
343   struct nouveau_pushbuf *push = nv50->base.pushbuf;
344   struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
345   struct nv50_program *fp = nv50->fragprog;
346   struct nv50_varying dummy;
347   int i, n, c, m;
348   uint32_t primid = 0;
349   uint32_t psiz = 0x000;
350   uint32_t interp = fp->fp.interp;
351   uint32_t colors = fp->fp.colors;
352   uint32_t lin[4];
353   uint8_t map[64];
354   uint8_t so_map[64];
355
356   if (!(nv50->dirty & (NV50_NEW_VERTPROG |
357                        NV50_NEW_FRAGPROG |
358                        NV50_NEW_GMTYPROG))) {
359      uint8_t bfc, ffc;
360      ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
361      bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
362         >> 8;
363      if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
364         return;
365   }
366
367   memset(lin, 0x00, sizeof(lin));
368
369   /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
370    *  or is it the first byte ?
371    */
372   memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
373
374   dummy.mask = 0xf; /* map all components of HPOS */
375   dummy.linear = 0;
376   m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
377
378   for (c = 0; c < vp->vp.clpd_nr; ++c)
379      map[m++] = vp->vp.clpd[c / 4] + (c % 4);
380
381   colors |= m << 8; /* adjust BFC0 id */
382
383   dummy.mask = 0x0;
384
385   /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
386   if (nv50->rast->pipe.light_twoside) {
387      for (i = 0; i < 2; ++i) {
388         n = vp->vp.bfc[i];
389         if (fp->vp.bfc[i] >= fp->in_nr)
390            continue;
391         m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
392                           (n < vp->out_nr) ? &vp->out[n] : &dummy);
393      }
394   }
395   colors += m - 4; /* adjust FFC0 id */
396   interp |= m << 8; /* set map id where 'normal' FP inputs start */
397
398   for (i = 0; i < fp->in_nr; ++i) {
399      for (n = 0; n < vp->out_nr; ++n)
400         if (vp->out[n].sn == fp->in[i].sn &&
401             vp->out[n].si == fp->in[i].si)
402            break;
403      m = nv50_vec4_map(map, m, lin,
404                        &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
405   }
406
407   /* PrimitiveID either is replaced by the system value, or
408    * written by the geometry shader into an output register
409    */
410   if (fp->gp.primid < 0x80) {
411      primid = m;
412      map[m++] = vp->gp.primid;
413   }
414
415   if (nv50->rast->pipe.point_size_per_vertex) {
416      psiz = (m << 4) | 1;
417      map[m++] = vp->vp.psiz;
418   }
419
420   if (nv50->rast->pipe.clamp_vertex_color)
421      colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
422
423   if (unlikely(vp->so)) {
424      /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
425       * gets written.
426       *
427       * TODO:
428       * Inverting vp->so->map (output -> offset) would probably speed this up.
429       */
430      memset(so_map, 0, sizeof(so_map));
431      for (i = 0; i < vp->so->map_size; ++i) {
432         if (vp->so->map[i] == 0xff)
433            continue;
434         for (c = 0; c < m; ++c)
435            if (map[c] == vp->so->map[i] && !so_map[c])
436               break;
437         if (c == m) {
438            c = m;
439            map[m++] = vp->so->map[i];
440         }
441         so_map[c] = 0x80 | i;
442      }
443      for (c = m; c & 3; ++c)
444         so_map[c] = 0;
445   }
446
447   n = (m + 3) / 4;
448   assert(m <= 64);
449
450   if (unlikely(nv50->gmtyprog)) {
451      BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);
452      PUSH_DATA (push, m);
453      BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);
454      PUSH_DATAp(push, map, n);
455   } else {
456      BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
457      PUSH_DATA (push, vp->vp.attrs[2]);
458
459      BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);
460      PUSH_DATA (push, primid);
461
462      BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
463      PUSH_DATA (push, m);
464      BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
465      PUSH_DATAp(push, map, n);
466   }
467
468   BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4);
469   PUSH_DATA (push, colors);
470   PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4);
471   PUSH_DATA (push, 0);
472   PUSH_DATA (push, psiz);
473
474   BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);
475   PUSH_DATA (push, interp);
476
477   nv50->state.interpolant_ctrl = interp;
478
479   nv50->state.semantic_color = colors;
480   nv50->state.semantic_psize = psiz;
481
482   BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
483   PUSH_DATAp(push, lin, 4);
484
485   BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
486   PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
487
488   if (vp->so) {
489      BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
490      PUSH_DATAp(push, so_map, n);
491   }
492}
493
494static int
495nv50_vp_gp_mapping(uint8_t *map, int m,
496                   struct nv50_program *vp, struct nv50_program *gp)
497{
498   int i, j, c;
499
500   for (i = 0; i < gp->in_nr; ++i) {
501      uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
502
503      for (j = 0; j < vp->out_nr; ++j) {
504         if (vp->out[j].sn == gp->in[i].sn &&
505             vp->out[j].si == gp->in[i].si) {
506            mv = vp->out[j].mask;
507            oid = vp->out[j].hw;
508            break;
509         }
510      }
511
512      for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
513         if (mg & mv & 1)
514            map[m++] = oid;
515         else
516         if (mg & 1)
517            map[m++] = (c == 3) ? 0x41 : 0x40;
518         oid += mv & 1;
519      }
520   }
521   return m;
522}
523
524void
525nv50_gp_linkage_validate(struct nv50_context *nv50)
526{
527   struct nouveau_pushbuf *push = nv50->base.pushbuf;
528   struct nv50_program *vp = nv50->vertprog;
529   struct nv50_program *gp = nv50->gmtyprog;
530   int m = 0;
531   int n;
532   uint8_t map[64];
533
534   if (!gp)
535      return;
536   memset(map, 0, sizeof(map));
537
538   m = nv50_vp_gp_mapping(map, m, vp, gp);
539
540   n = (m + 3) / 4;
541
542   BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
543   PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);
544
545   BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
546   PUSH_DATA (push, m);
547   BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
548   PUSH_DATAp(push, map, n);
549}
550
551void
552nv50_stream_output_validate(struct nv50_context *nv50)
553{
554   struct nouveau_pushbuf *push = nv50->base.pushbuf;
555   struct nv50_stream_output_state *so;
556   uint32_t ctrl;
557   unsigned i;
558   unsigned prims = ~0;
559
560   so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
561
562   if (!so || !nv50->num_so_targets) {
563      BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
564      PUSH_DATA (push, 0);
565      if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
566         BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
567         PUSH_DATA (push, 0);
568      }
569      BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
570      PUSH_DATA (push, 1);
571      return;
572   }
573
574   ctrl = so->ctrl;
575   if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
576      ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
577
578   BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
579   PUSH_DATA (push, ctrl);
580
581   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
582
583   for (i = 0; i < nv50->num_so_targets; ++i) {
584      struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
585      struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
586
587      const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
588
589      if (n == 4 && !targ->clean)
590         nv84_query_fifo_wait(push, targ->pq);
591      BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
592      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
593      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
594      PUSH_DATA (push, so->num_attribs[i]);
595      if (n == 4) {
596         PUSH_DATA(push, targ->pipe.buffer_size);
597
598         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
599         if (!targ->clean) {
600            assert(targ->pq);
601            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
602         } else {
603            PUSH_DATA(push, 0);
604            targ->clean = FALSE;
605         }
606      } else {
607         const unsigned limit = targ->pipe.buffer_size /
608            (so->stride[i] * nv50->state.prim_size);
609         prims = MIN2(prims, limit);
610      }
611      BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
612   }
613   if (prims != ~0) {
614      BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
615      PUSH_DATA (push, prims);
616   }
617   BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
618   PUSH_DATA (push, 1);
619   BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
620   PUSH_DATA (push, 1);
621}
622