brw_state_upload.c revision 993c52d0be5bdf0e30e64ab4c6e1347c5dcb8e3b
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33
34#include "brw_context.h"
35#include "brw_state.h"
36#include "intel_batchbuffer.h"
37#include "intel_buffers.h"
38
39/* This is used to initialize brw->state.atoms[].  We could use this
40 * list directly except for a single atom, brw_constant_buffer, which
41 * has a .dirty value which changes according to the parameters of the
42 * current fragment and vertex programs, and so cannot be a static
43 * value.
44 */
45static const struct brw_tracked_state *gen4_atoms[] =
46{
47   &brw_wm_input_sizes,
48   &brw_vs_prog, /* must do before GS prog, state base address. */
49   &brw_gs_prog, /* must do before state base address */
50   &brw_clip_prog, /* must do before state base address */
51   &brw_sf_prog, /* must do before state base address */
52   &brw_wm_prog, /* must do before state base address */
53
54   /* Once all the programs are done, we know how large urb entry
55    * sizes need to be and can decide if we need to change the urb
56    * layout.
57    */
58   &brw_curbe_offsets,
59   &brw_recalculate_urb_fence,
60
61   &brw_cc_vp,
62   &brw_cc_unit,
63
64   /* Surface state setup.  Must come before the VS/WM unit.  The binding
65    * table upload must be last.
66    */
67   &brw_vs_pull_constants,
68   &brw_wm_pull_constants,
69   &brw_renderbuffer_surfaces,
70   &brw_texture_surfaces,
71   &brw_vs_binding_table,
72   &brw_wm_binding_table,
73
74   &brw_samplers,
75
76   /* These set up state for brw_psp_urb_cbs */
77   &brw_wm_unit,
78   &brw_sf_vp,
79   &brw_sf_unit,
80   &brw_vs_unit,		/* always required, enabled or not */
81   &brw_clip_unit,
82   &brw_gs_unit,
83
84   /* Command packets:
85    */
86   &brw_invariant_state,
87   &brw_state_base_address,
88
89   &brw_binding_table_pointers,
90   &brw_blend_constant_color,
91
92   &brw_depthbuffer,
93
94   &brw_polygon_stipple,
95   &brw_polygon_stipple_offset,
96
97   &brw_line_stipple,
98   &brw_aa_line_parameters,
99
100   &brw_psp_urb_cbs,
101
102   &brw_drawing_rect,
103   &brw_indices,
104   &brw_index_buffer,
105   &brw_vertices,
106
107   &brw_constant_buffer
108};
109
110static const struct brw_tracked_state *gen6_atoms[] =
111{
112   &brw_wm_input_sizes,
113   &brw_vs_prog, /* must do before state base address */
114   &brw_gs_prog, /* must do before state base address */
115   &brw_wm_prog, /* must do before state base address */
116
117   &gen6_clip_vp,
118   &gen6_sf_vp,
119
120   /* Command packets: */
121   &brw_invariant_state,
122
123   /* must do before binding table pointers, cc state ptrs */
124   &brw_state_base_address,
125
126   &brw_cc_vp,
127   &gen6_viewport_state,	/* must do after *_vp stages */
128
129   &gen6_urb,
130   &gen6_blend_state,		/* must do before cc unit */
131   &gen6_color_calc_state,	/* must do before cc unit */
132   &gen6_depth_stencil_state,	/* must do before cc unit */
133   &gen6_cc_state_pointers,
134
135   &gen6_vs_push_constants, /* Before vs_state */
136   &gen6_wm_push_constants, /* Before wm_state */
137
138   /* Surface state setup.  Must come before the VS/WM unit.  The binding
139    * table upload must be last.
140    */
141   &brw_vs_pull_constants,
142   &brw_vs_ubo_surfaces,
143   &brw_wm_pull_constants,
144   &brw_wm_ubo_surfaces,
145   &gen6_renderbuffer_surfaces,
146   &brw_texture_surfaces,
147   &gen6_sol_surface,
148   &brw_vs_binding_table,
149   &gen6_gs_binding_table,
150   &brw_wm_binding_table,
151
152   &brw_samplers,
153   &gen6_sampler_state,
154   &gen6_multisample_state,
155
156   &gen6_vs_state,
157   &gen6_gs_state,
158   &gen6_clip_state,
159   &gen6_sf_state,
160   &gen6_wm_state,
161
162   &gen6_scissor_state,
163
164   &gen6_binding_table_pointers,
165
166   &brw_depthbuffer,
167
168   &brw_polygon_stipple,
169   &brw_polygon_stipple_offset,
170
171   &brw_line_stipple,
172   &brw_aa_line_parameters,
173
174   &brw_drawing_rect,
175
176   &gen6_sol_indices,
177   &brw_indices,
178   &brw_index_buffer,
179   &brw_vertices,
180};
181
182const struct brw_tracked_state *gen7_atoms[] =
183{
184   &brw_wm_input_sizes,
185   &brw_vs_prog,
186   &brw_wm_prog,
187
188   /* Command packets: */
189   &brw_invariant_state,
190   &gen7_push_constant_alloc,
191
192   /* must do before binding table pointers, cc state ptrs */
193   &brw_state_base_address,
194
195   &brw_cc_vp,
196   &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
197   &gen7_sf_clip_viewport,
198
199   &gen7_urb,
200   &gen6_blend_state,		/* must do before cc unit */
201   &gen6_color_calc_state,	/* must do before cc unit */
202   &gen6_depth_stencil_state,	/* must do before cc unit */
203   &gen7_blend_state_pointer,
204   &gen7_cc_state_pointer,
205   &gen7_depth_stencil_state_pointer,
206
207   &gen6_vs_push_constants, /* Before vs_state */
208   &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
209
210   /* Surface state setup.  Must come before the VS/WM unit.  The binding
211    * table upload must be last.
212    */
213   &brw_vs_pull_constants,
214   &brw_vs_ubo_surfaces,
215   &brw_wm_pull_constants,
216   &brw_wm_ubo_surfaces,
217   &gen6_renderbuffer_surfaces,
218   &brw_texture_surfaces,
219   &brw_vs_binding_table,
220   &brw_wm_binding_table,
221
222   &gen7_samplers,
223   &gen6_multisample_state,
224
225   &gen7_disable_stages,
226   &gen7_vs_state,
227   &gen7_sol_state,
228   &gen7_clip_state,
229   &gen7_sbe_state,
230   &gen7_sf_state,
231   &gen7_wm_state,
232   &gen7_ps_state,
233
234   &gen6_scissor_state,
235
236   &gen7_depthbuffer,
237
238   &brw_polygon_stipple,
239   &brw_polygon_stipple_offset,
240
241   &brw_line_stipple,
242   &brw_aa_line_parameters,
243
244   &brw_drawing_rect,
245
246   &brw_indices,
247   &brw_index_buffer,
248   &brw_vertices,
249};
250
251
252void brw_init_state( struct brw_context *brw )
253{
254   const struct brw_tracked_state **atoms;
255   int num_atoms;
256
257   brw_init_caches(brw);
258
259   if (brw->intel.gen >= 7) {
260      atoms = gen7_atoms;
261      num_atoms = ARRAY_SIZE(gen7_atoms);
262   } else if (brw->intel.gen == 6) {
263      atoms = gen6_atoms;
264      num_atoms = ARRAY_SIZE(gen6_atoms);
265   } else {
266      atoms = gen4_atoms;
267      num_atoms = ARRAY_SIZE(gen4_atoms);
268   }
269
270   brw->atoms = atoms;
271   brw->num_atoms = num_atoms;
272
273   while (num_atoms--) {
274      assert((*atoms)->dirty.mesa |
275	     (*atoms)->dirty.brw |
276	     (*atoms)->dirty.cache);
277      assert((*atoms)->emit);
278      atoms++;
279   }
280}
281
282
283void brw_destroy_state( struct brw_context *brw )
284{
285   brw_destroy_caches(brw);
286}
287
288/***********************************************************************
289 */
290
291static GLuint check_state( const struct brw_state_flags *a,
292			   const struct brw_state_flags *b )
293{
294   return ((a->mesa & b->mesa) |
295	   (a->brw & b->brw) |
296	   (a->cache & b->cache)) != 0;
297}
298
299static void accumulate_state( struct brw_state_flags *a,
300			      const struct brw_state_flags *b )
301{
302   a->mesa |= b->mesa;
303   a->brw |= b->brw;
304   a->cache |= b->cache;
305}
306
307
308static void xor_states( struct brw_state_flags *result,
309			     const struct brw_state_flags *a,
310			      const struct brw_state_flags *b )
311{
312   result->mesa = a->mesa ^ b->mesa;
313   result->brw = a->brw ^ b->brw;
314   result->cache = a->cache ^ b->cache;
315}
316
317struct dirty_bit_map {
318   uint32_t bit;
319   char *name;
320   uint32_t count;
321};
322
323#define DEFINE_BIT(name) {name, #name, 0}
324
325static struct dirty_bit_map mesa_bits[] = {
326   DEFINE_BIT(_NEW_MODELVIEW),
327   DEFINE_BIT(_NEW_PROJECTION),
328   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
329   DEFINE_BIT(_NEW_COLOR),
330   DEFINE_BIT(_NEW_DEPTH),
331   DEFINE_BIT(_NEW_EVAL),
332   DEFINE_BIT(_NEW_FOG),
333   DEFINE_BIT(_NEW_HINT),
334   DEFINE_BIT(_NEW_LIGHT),
335   DEFINE_BIT(_NEW_LINE),
336   DEFINE_BIT(_NEW_PIXEL),
337   DEFINE_BIT(_NEW_POINT),
338   DEFINE_BIT(_NEW_POLYGON),
339   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
340   DEFINE_BIT(_NEW_SCISSOR),
341   DEFINE_BIT(_NEW_STENCIL),
342   DEFINE_BIT(_NEW_TEXTURE),
343   DEFINE_BIT(_NEW_TRANSFORM),
344   DEFINE_BIT(_NEW_VIEWPORT),
345   DEFINE_BIT(_NEW_PACKUNPACK),
346   DEFINE_BIT(_NEW_ARRAY),
347   DEFINE_BIT(_NEW_RENDERMODE),
348   DEFINE_BIT(_NEW_BUFFERS),
349   DEFINE_BIT(_NEW_MULTISAMPLE),
350   DEFINE_BIT(_NEW_TRACK_MATRIX),
351   DEFINE_BIT(_NEW_PROGRAM),
352   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
353   {0, 0, 0}
354};
355
356static struct dirty_bit_map brw_bits[] = {
357   DEFINE_BIT(BRW_NEW_URB_FENCE),
358   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
359   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
360   DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
361   DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
362   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
363   DEFINE_BIT(BRW_NEW_PRIMITIVE),
364   DEFINE_BIT(BRW_NEW_CONTEXT),
365   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
366   DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
367   DEFINE_BIT(BRW_NEW_PSP),
368   DEFINE_BIT(BRW_NEW_SURFACES),
369   DEFINE_BIT(BRW_NEW_INDICES),
370   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
371   DEFINE_BIT(BRW_NEW_VERTICES),
372   DEFINE_BIT(BRW_NEW_BATCH),
373   DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
374   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
375   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
376   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
377   DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
378   {0, 0, 0}
379};
380
381static struct dirty_bit_map cache_bits[] = {
382   DEFINE_BIT(CACHE_NEW_BLEND_STATE),
383   DEFINE_BIT(CACHE_NEW_CC_VP),
384   DEFINE_BIT(CACHE_NEW_CC_UNIT),
385   DEFINE_BIT(CACHE_NEW_WM_PROG),
386   DEFINE_BIT(CACHE_NEW_SAMPLER),
387   DEFINE_BIT(CACHE_NEW_WM_UNIT),
388   DEFINE_BIT(CACHE_NEW_SF_PROG),
389   DEFINE_BIT(CACHE_NEW_SF_VP),
390   DEFINE_BIT(CACHE_NEW_SF_UNIT),
391   DEFINE_BIT(CACHE_NEW_VS_UNIT),
392   DEFINE_BIT(CACHE_NEW_VS_PROG),
393   DEFINE_BIT(CACHE_NEW_GS_UNIT),
394   DEFINE_BIT(CACHE_NEW_GS_PROG),
395   DEFINE_BIT(CACHE_NEW_CLIP_VP),
396   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
397   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
398   {0, 0, 0}
399};
400
401
402static void
403brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
404{
405   int i;
406
407   for (i = 0; i < 32; i++) {
408      if (bit_map[i].bit == 0)
409	 return;
410
411      if (bit_map[i].bit & bits)
412	 bit_map[i].count++;
413   }
414}
415
416static void
417brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
418{
419   int i;
420
421   for (i = 0; i < 32; i++) {
422      if (bit_map[i].bit == 0)
423	 return;
424
425      fprintf(stderr, "0x%08x: %12d (%s)\n",
426	      bit_map[i].bit, bit_map[i].count, bit_map[i].name);
427   }
428}
429
430/***********************************************************************
431 * Emit all state:
432 */
433void brw_upload_state(struct brw_context *brw)
434{
435   struct gl_context *ctx = &brw->intel.ctx;
436   struct intel_context *intel = &brw->intel;
437   struct brw_state_flags *state = &brw->state.dirty;
438   int i;
439   static int dirty_count = 0;
440
441   state->mesa |= brw->intel.NewGLState;
442   brw->intel.NewGLState = 0;
443
444   if (brw->emit_state_always) {
445      state->mesa |= ~0;
446      state->brw |= ~0;
447      state->cache |= ~0;
448   }
449
450   if (brw->fragment_program != ctx->FragmentProgram._Current) {
451      brw->fragment_program = ctx->FragmentProgram._Current;
452      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
453   }
454
455   if (brw->vertex_program != ctx->VertexProgram._Current) {
456      brw->vertex_program = ctx->VertexProgram._Current;
457      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
458   }
459
460   if ((state->mesa | state->cache | state->brw) == 0)
461      return;
462
463   intel_check_front_buffer_rendering(intel);
464
465   if (unlikely(INTEL_DEBUG)) {
466      /* Debug version which enforces various sanity checks on the
467       * state flags which are generated and checked to help ensure
468       * state atoms are ordered correctly in the list.
469       */
470      struct brw_state_flags examined, prev;
471      memset(&examined, 0, sizeof(examined));
472      prev = *state;
473
474      for (i = 0; i < brw->num_atoms; i++) {
475	 const struct brw_tracked_state *atom = brw->atoms[i];
476	 struct brw_state_flags generated;
477
478	 if (check_state(state, &atom->dirty)) {
479	    atom->emit(brw);
480	 }
481
482	 accumulate_state(&examined, &atom->dirty);
483
484	 /* generated = (prev ^ state)
485	  * if (examined & generated)
486	  *     fail;
487	  */
488	 xor_states(&generated, &prev, state);
489	 assert(!check_state(&examined, &generated));
490	 prev = *state;
491      }
492   }
493   else {
494      for (i = 0; i < brw->num_atoms; i++) {
495	 const struct brw_tracked_state *atom = brw->atoms[i];
496
497	 if (check_state(state, &atom->dirty)) {
498	    atom->emit(brw);
499	 }
500      }
501   }
502
503   if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
504      brw_update_dirty_count(mesa_bits, state->mesa);
505      brw_update_dirty_count(brw_bits, state->brw);
506      brw_update_dirty_count(cache_bits, state->cache);
507      if (dirty_count++ % 1000 == 0) {
508	 brw_print_dirty_count(mesa_bits, state->mesa);
509	 brw_print_dirty_count(brw_bits, state->brw);
510	 brw_print_dirty_count(cache_bits, state->cache);
511	 fprintf(stderr, "\n");
512      }
513   }
514
515   memset(state, 0, sizeof(*state));
516}
517