brw_wm_state.c revision f30de6964018619658439216cd8bf9371ee6256d
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33
34#include "brw_context.h"
35#include "brw_state.h"
36#include "brw_defines.h"
37#include "brw_wm.h"
38
39/***********************************************************************
40 * WM unit - fragment programs and rasterization
41 */
42
43struct brw_wm_unit_key {
44   unsigned int total_grf, total_scratch;
45   unsigned int urb_entry_read_length;
46   unsigned int curb_entry_read_length;
47   unsigned int dispatch_grf_start_reg;
48
49   unsigned int curbe_offset;
50   unsigned int urb_size;
51
52   unsigned int nr_surfaces, sampler_count;
53   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
54   GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
55   GLboolean color_write_enable;
56   GLfloat offset_units, offset_factor;
57};
58
59bool
60brw_color_buffer_write_enabled(struct brw_context *brw)
61{
62   struct gl_context *ctx = &brw->intel.ctx;
63   const struct gl_fragment_program *fp = brw->fragment_program;
64   int i;
65
66   /* _NEW_BUFFERS */
67   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
68      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
69
70      /* _NEW_COLOR */
71      if (rb &&
72	  (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
73	   fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
74	  (ctx->Color.ColorMask[i][0] ||
75	   ctx->Color.ColorMask[i][1] ||
76	   ctx->Color.ColorMask[i][2] ||
77	   ctx->Color.ColorMask[i][3])) {
78	 return true;
79      }
80   }
81
82   return false;
83}
84
85static void
86wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
87{
88   struct gl_context *ctx = &brw->intel.ctx;
89   const struct gl_fragment_program *fp = brw->fragment_program;
90   const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
91   struct intel_context *intel = &brw->intel;
92
93   memset(key, 0, sizeof(*key));
94
95   /* CACHE_NEW_WM_PROG */
96   key->total_grf = brw->wm.prog_data->total_grf;
97   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
98   key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
99   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
100   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
101
102   /* BRW_NEW_URB_FENCE */
103   key->urb_size = brw->urb.vsize;
104
105   /* BRW_NEW_CURBE_OFFSETS */
106   key->curbe_offset = brw->curbe.wm_start;
107
108   /* BRW_NEW_NR_SURFACEs */
109   key->nr_surfaces = brw->wm.nr_surfaces;
110
111   /* CACHE_NEW_SAMPLER */
112   key->sampler_count = brw->wm.sampler_count;
113
114   /* _NEW_POLYGONSTIPPLE */
115   key->polygon_stipple = ctx->Polygon.StippleFlag;
116
117   /* BRW_NEW_FRAGMENT_PROGRAM */
118   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
119
120   /* as far as we can tell */
121   key->computes_depth =
122      (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
123   /* BRW_NEW_DEPTH_BUFFER
124    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
125    * Depth field.
126    */
127   if (brw->state.depth_region == NULL)
128      key->computes_depth = 0;
129
130   /* _NEW_BUFFERS | _NEW_COLOR */
131   key->color_write_enable = brw_color_buffer_write_enabled(brw);
132
133   /* _NEW_COLOR */
134   key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
135   key->is_glsl = bfp->isGLSL;
136
137   /* If using the fragment shader backend, the program is always
138    * 8-wide.
139    */
140   if (ctx->Shader.CurrentProgram) {
141      struct brw_shader *shader = (struct brw_shader *)
142	 ctx->Shader.CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
143
144      if (shader != NULL && shader->ir != NULL) {
145	 key->is_glsl = GL_TRUE;
146      }
147   }
148
149   /* _NEW_DEPTH */
150   key->stats_wm = intel->stats_wm;
151
152   /* _NEW_LINE */
153   key->line_stipple = ctx->Line.StippleFlag;
154
155   /* _NEW_POLYGON */
156   key->offset_enable = ctx->Polygon.OffsetFill;
157   key->offset_units = ctx->Polygon.OffsetUnits;
158   key->offset_factor = ctx->Polygon.OffsetFactor;
159}
160
161/**
162 * Setup wm hardware state.  See page 225 of Volume 2
163 */
164static drm_intel_bo *
165wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
166			drm_intel_bo **reloc_bufs)
167{
168   struct intel_context *intel = &brw->intel;
169   struct brw_wm_unit_state wm;
170   drm_intel_bo *bo;
171
172   memset(&wm, 0, sizeof(wm));
173
174   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
175   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
176   wm.thread1.depth_coef_urb_read_offset = 1;
177   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
178
179   if (intel->gen == 5)
180      wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
181   else
182      wm.thread1.binding_table_entry_count = key->nr_surfaces;
183
184   if (key->total_scratch != 0) {
185      wm.thread2.scratch_space_base_pointer =
186	 brw->wm.scratch_bo->offset >> 10; /* reloc */
187      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
188   } else {
189      wm.thread2.scratch_space_base_pointer = 0;
190      wm.thread2.per_thread_scratch_space = 0;
191   }
192
193   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
194   wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
195   wm.thread3.urb_entry_read_offset = 0;
196   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
197   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
198
199   if (intel->gen == 5)
200      wm.wm4.sampler_count = 0; /* hardware requirement */
201   else
202      wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
203
204   if (brw->wm.sampler_bo != NULL) {
205      /* reloc */
206      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
207   } else {
208      wm.wm4.sampler_state_pointer = 0;
209   }
210
211   wm.wm5.program_uses_depth = key->uses_depth;
212   wm.wm5.program_computes_depth = key->computes_depth;
213   wm.wm5.program_uses_killpixel = key->uses_kill;
214
215   if (key->is_glsl)
216      wm.wm5.enable_8_pix = 1;
217   else
218      wm.wm5.enable_16_pix = 1;
219
220   wm.wm5.max_threads = brw->wm_max_threads - 1;
221
222   if (key->color_write_enable ||
223       key->uses_kill ||
224       key->computes_depth) {
225      wm.wm5.thread_dispatch_enable = 1;
226   }
227
228   wm.wm5.legacy_line_rast = 0;
229   wm.wm5.legacy_global_depth_bias = 0;
230   wm.wm5.early_depth_test = 1;	        /* never need to disable */
231   wm.wm5.line_aa_region_width = 0;
232   wm.wm5.line_endcap_aa_region_width = 1;
233
234   wm.wm5.polygon_stipple = key->polygon_stipple;
235
236   if (key->offset_enable) {
237      wm.wm5.depth_offset = 1;
238      /* Something wierd going on with legacy_global_depth_bias,
239       * offset_constant, scaling and MRD.  This value passes glean
240       * but gives some odd results elsewere (eg. the
241       * quad-offset-units test).
242       */
243      wm.global_depth_offset_constant = key->offset_units * 2;
244
245      /* This is the only value that passes glean:
246       */
247      wm.global_depth_offset_scale = key->offset_factor;
248   }
249
250   wm.wm5.line_stipple = key->line_stipple;
251
252   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
253      wm.wm4.stats_enable = 1;
254
255   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
256			 key, sizeof(*key),
257			 reloc_bufs, 3,
258			 &wm, sizeof(wm));
259
260   /* Emit WM program relocation */
261   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0),
262			   brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
263			   I915_GEM_DOMAIN_INSTRUCTION, 0);
264
265   /* Emit scratch space relocation */
266   if (key->total_scratch != 0) {
267      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
268			      brw->wm.scratch_bo,
269			      wm.thread2.per_thread_scratch_space,
270			      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
271   }
272
273   /* Emit sampler state relocation */
274   if (key->sampler_count != 0) {
275      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4),
276			      brw->wm.sampler_bo, (wm.wm4.stats_enable |
277						   (wm.wm4.sampler_count << 2)),
278			      I915_GEM_DOMAIN_INSTRUCTION, 0);
279   }
280
281   return bo;
282}
283
284
285static void upload_wm_unit( struct brw_context *brw )
286{
287   struct intel_context *intel = &brw->intel;
288   struct brw_wm_unit_key key;
289   drm_intel_bo *reloc_bufs[3];
290   wm_unit_populate_key(brw, &key);
291
292   /* Allocate the necessary scratch space if we haven't already.  Don't
293    * bother reducing the allocation later, since we use scratch so
294    * rarely.
295    */
296   assert(key.total_scratch <= 12 * 1024);
297   if (key.total_scratch) {
298      GLuint total = key.total_scratch * brw->wm_max_threads;
299
300      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
301	 drm_intel_bo_unreference(brw->wm.scratch_bo);
302	 brw->wm.scratch_bo = NULL;
303      }
304      if (brw->wm.scratch_bo == NULL) {
305	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
306						 "wm scratch",
307						 total,
308						 4096);
309      }
310   }
311
312   reloc_bufs[0] = brw->wm.prog_bo;
313   reloc_bufs[1] = brw->wm.scratch_bo;
314   reloc_bufs[2] = brw->wm.sampler_bo;
315
316   drm_intel_bo_unreference(brw->wm.state_bo);
317   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
318				       &key, sizeof(key),
319				       reloc_bufs, 3,
320				       NULL);
321   if (brw->wm.state_bo == NULL) {
322      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
323   }
324}
325
326const struct brw_tracked_state brw_wm_unit = {
327   .dirty = {
328      .mesa = (_NEW_POLYGON |
329	       _NEW_POLYGONSTIPPLE |
330	       _NEW_LINE |
331	       _NEW_COLOR |
332	       _NEW_DEPTH |
333	       _NEW_BUFFERS),
334
335      .brw = (BRW_NEW_FRAGMENT_PROGRAM |
336	      BRW_NEW_CURBE_OFFSETS |
337	      BRW_NEW_DEPTH_BUFFER |
338	      BRW_NEW_NR_WM_SURFACES),
339
340      .cache = (CACHE_NEW_WM_PROG |
341		CACHE_NEW_SAMPLER)
342   },
343   .prepare = upload_wm_unit,
344};
345
346