brw_wm_state.c revision d0774eae304a9e109f2bdbb361fe0c1ef0cf6691
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33
34#include "brw_context.h"
35#include "brw_state.h"
36#include "brw_defines.h"
37#include "dri_bufmgr.h"
38#include "brw_wm.h"
39
40/***********************************************************************
41 * WM unit - fragment programs and rasterization
42 */
43
44struct brw_wm_unit_key {
45   unsigned int total_grf, total_scratch;
46   unsigned int urb_entry_read_length;
47   unsigned int curb_entry_read_length;
48   unsigned int dispatch_grf_start_reg;
49
50   unsigned int curbe_offset;
51   unsigned int urb_size;
52
53   unsigned int max_threads;
54
55   unsigned int nr_surfaces, sampler_count;
56   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
57   GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
58   GLfloat offset_units, offset_factor;
59};
60
61static void
62wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
63{
64   const struct gl_fragment_program *fp = brw->fragment_program;
65   struct intel_context *intel = &brw->intel;
66
67   memset(key, 0, sizeof(*key));
68
69   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
70      key->max_threads = 1;
71   else
72      key->max_threads = 32;
73
74   /* CACHE_NEW_WM_PROG */
75   key->total_grf = brw->wm.prog_data->total_grf;
76   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
77   key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
78   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
79   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
80
81   /* BRW_NEW_URB_FENCE */
82   key->urb_size = brw->urb.vsize;
83
84   /* BRW_NEW_CURBE_OFFSETS */
85   key->curbe_offset = brw->curbe.wm_start;
86
87   /* CACHE_NEW_SURFACE */
88   key->nr_surfaces = brw->wm.nr_surfaces;
89
90   /* CACHE_NEW_SAMPLER */
91   key->sampler_count = brw->wm.sampler_count;
92
93   /* _NEW_POLYGONSTIPPLE */
94   key->polygon_stipple = brw->attribs.Polygon->StippleFlag;
95
96   /* BRW_NEW_FRAGMENT_PROGRAM */
97   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
98
99   /* as far as we can tell */
100   key->computes_depth =
101      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0;
102
103   /* _NEW_COLOR */
104   key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled;
105   key->is_glsl = brw_wm_is_glsl(fp);
106
107   /* XXX: This needs a flag to indicate when it changes. */
108   key->stats_wm = intel->stats_wm;
109
110   /* _NEW_LINE */
111   key->line_stipple = brw->attribs.Line->StippleFlag;
112
113   /* _NEW_POLYGON */
114   key->offset_enable = brw->attribs.Polygon->OffsetFill;
115   key->offset_units = brw->attribs.Polygon->OffsetUnits;
116   key->offset_factor = brw->attribs.Polygon->OffsetFactor;
117}
118
119static dri_bo *
120wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
121			dri_bo **reloc_bufs)
122{
123   struct brw_wm_unit_state wm;
124   dri_bo *bo;
125
126   memset(&wm, 0, sizeof(wm));
127
128   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
129   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
130   wm.thread1.depth_coef_urb_read_offset = 1;
131   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
132   wm.thread1.binding_table_entry_count = key->nr_surfaces;
133
134   if (key->total_scratch != 0) {
135      wm.thread2.scratch_space_base_pointer =
136	 brw->wm.scratch_buffer->offset >> 10; /* reloc */
137      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
138   } else {
139      wm.thread2.scratch_space_base_pointer = 0;
140      wm.thread2.per_thread_scratch_space = 0;
141   }
142
143   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
144   wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
145   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
146   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
147   wm.thread3.urb_entry_read_offset = 0;
148
149   wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
150   if (brw->wm.sampler_bo != NULL) {
151      /* reloc */
152      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
153   } else {
154      wm.wm4.sampler_state_pointer = 0;
155   }
156
157   wm.wm5.program_uses_depth = key->uses_depth;
158   wm.wm5.program_computes_depth = key->computes_depth;
159   wm.wm5.program_uses_killpixel = key->uses_kill;
160
161   if (key->is_glsl)
162      wm.wm5.enable_8_pix = 1;
163   else
164      wm.wm5.enable_16_pix = 1;
165
166   wm.wm5.max_threads = key->max_threads - 1;
167   wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
168   wm.wm5.legacy_line_rast = 0;
169   wm.wm5.legacy_global_depth_bias = 0;
170   wm.wm5.early_depth_test = 1;	        /* never need to disable */
171   wm.wm5.line_aa_region_width = 0;
172   wm.wm5.line_endcap_aa_region_width = 1;
173
174   wm.wm5.polygon_stipple = key->polygon_stipple;
175
176   if (key->offset_enable) {
177      wm.wm5.depth_offset = 1;
178      /* Something wierd going on with legacy_global_depth_bias,
179       * offset_constant, scaling and MRD.  This value passes glean
180       * but gives some odd results elsewere (eg. the
181       * quad-offset-units test).
182       */
183      wm.global_depth_offset_constant = key->offset_units * 2;
184
185      /* This is the only value that passes glean:
186       */
187      wm.global_depth_offset_scale = key->offset_factor;
188   }
189
190   wm.wm5.line_stipple = key->line_stipple;
191
192   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
193      wm.wm4.stats_enable = 1;
194
195   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
196			 key, sizeof(*key),
197			 reloc_bufs, 3,
198			 &wm, sizeof(wm),
199			 NULL, NULL);
200
201   /* Emit WM program relocation */
202   dri_emit_reloc(bo,
203		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
204		  wm.thread0.grf_reg_count << 1,
205		  offsetof(struct brw_wm_unit_state, thread0),
206		  brw->wm.prog_bo);
207
208   /* Emit scratch space relocation */
209   if (key->total_scratch != 0) {
210      dri_emit_reloc(bo,
211		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
212		     wm.thread2.per_thread_scratch_space,
213		     offsetof(struct brw_wm_unit_state, thread2),
214		     brw->wm.scratch_buffer);
215   }
216
217   /* Emit sampler state relocation */
218   if (key->sampler_count != 0) {
219      dri_emit_reloc(bo,
220		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
221		     wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
222		     offsetof(struct brw_wm_unit_state, wm4),
223		     brw->wm.sampler_bo);
224   }
225
226   return bo;
227}
228
229
230static void upload_wm_unit( struct brw_context *brw )
231{
232   struct intel_context *intel = &brw->intel;
233   struct brw_wm_unit_key key;
234   dri_bo *reloc_bufs[3];
235
236   wm_unit_populate_key(brw, &key);
237
238   /* Allocate the necessary scratch space if we haven't already.  Don't
239    * bother reducing the allocation later, since we use scratch so
240    * rarely.
241    */
242   assert(key.total_scratch <= 12 * 1024);
243   if (key.total_scratch) {
244      GLuint total = key.total_scratch * key.max_threads;
245
246      if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) {
247	 dri_bo_unreference(brw->wm.scratch_buffer);
248	 brw->wm.scratch_buffer = NULL;
249      }
250      if (brw->wm.scratch_buffer == NULL) {
251	 brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
252					       "wm scratch",
253					       total,
254					       4096, DRM_BO_FLAG_MEM_TT);
255      }
256   }
257
258   reloc_bufs[0] = brw->wm.prog_bo;
259   reloc_bufs[1] = brw->wm.scratch_buffer;
260   reloc_bufs[2] = brw->wm.sampler_bo;
261
262   dri_bo_unreference(brw->wm.state_bo);
263   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
264				       &key, sizeof(key),
265				       reloc_bufs, 3,
266				       NULL);
267   if (brw->wm.state_bo == NULL) {
268      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
269   }
270}
271
272const struct brw_tracked_state brw_wm_unit = {
273   .dirty = {
274      .mesa = (_NEW_POLYGON |
275	       _NEW_POLYGONSTIPPLE |
276	       _NEW_LINE |
277	       _NEW_COLOR),
278
279      .brw = (BRW_NEW_FRAGMENT_PROGRAM |
280	      BRW_NEW_CURBE_OFFSETS |
281	      BRW_NEW_LOCK),
282
283      .cache = (CACHE_NEW_SURFACE |
284		CACHE_NEW_WM_PROG |
285		CACHE_NEW_SAMPLER)
286   },
287   .update = upload_wm_unit,
288};
289
290