brw_wm_state.c revision 834cc8e501c2632fd8f9fc78502a1a99803e6fb9
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33 34#include "brw_context.h" 35#include "brw_state.h" 36#include "brw_defines.h" 37#include "brw_wm.h" 38 39/*********************************************************************** 40 * WM unit - fragment programs and rasterization 41 */ 42 43struct brw_wm_unit_key { 44 unsigned int total_grf, total_scratch; 45 unsigned int urb_entry_read_length; 46 unsigned int curb_entry_read_length; 47 unsigned int dispatch_grf_start_reg; 48 49 unsigned int curbe_offset; 50 unsigned int urb_size; 51 52 unsigned int nr_surfaces, sampler_count; 53 GLboolean uses_depth, computes_depth, uses_kill, is_glsl; 54 GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; 55 GLboolean color_write_enable; 56 GLfloat offset_units, offset_factor; 57}; 58 59bool 60brw_color_buffer_write_enabled(struct brw_context *brw) 61{ 62 struct gl_context *ctx = &brw->intel.ctx; 63 const struct gl_fragment_program *fp = brw->fragment_program; 64 int i; 65 66 /* _NEW_BUFFERS */ 67 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { 68 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; 69 70 /* _NEW_COLOR */ 71 if (rb && 72 (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) || 73 fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) && 74 (ctx->Color.ColorMask[i][0] || 75 ctx->Color.ColorMask[i][1] || 76 ctx->Color.ColorMask[i][2] || 77 ctx->Color.ColorMask[i][3])) { 78 return true; 79 } 80 } 81 82 return false; 83} 84 85static void 86wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) 87{ 88 struct gl_context *ctx = &brw->intel.ctx; 89 const struct gl_fragment_program *fp = brw->fragment_program; 90 struct intel_context *intel = &brw->intel; 91 92 memset(key, 0, sizeof(*key)); 93 94 /* CACHE_NEW_WM_PROG */ 95 key->total_grf = brw->wm.prog_data->total_grf; 96 key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; 97 key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; 98 key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; 99 key->total_scratch = brw->wm.prog_data->total_scratch; 100 101 /* BRW_NEW_URB_FENCE */ 102 key->urb_size = brw->urb.vsize; 103 104 /* BRW_NEW_CURBE_OFFSETS */ 105 key->curbe_offset = brw->curbe.wm_start; 106 107 /* BRW_NEW_NR_SURFACEs */ 108 key->nr_surfaces = brw->wm.nr_surfaces; 109 110 /* CACHE_NEW_SAMPLER */ 111 key->sampler_count = brw->wm.sampler_count; 112 113 /* _NEW_POLYGONSTIPPLE */ 114 key->polygon_stipple = ctx->Polygon.StippleFlag; 115 116 /* BRW_NEW_FRAGMENT_PROGRAM */ 117 key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; 118 119 /* as far as we can tell */ 120 key->computes_depth = 121 (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; 122 /* BRW_NEW_DEPTH_BUFFER 123 * Override for NULL depthbuffer case, required by the Pixel Shader Computed 124 * Depth field. 125 */ 126 if (brw->state.depth_region == NULL) 127 key->computes_depth = 0; 128 129 /* _NEW_BUFFERS | _NEW_COLOR */ 130 key->color_write_enable = brw_color_buffer_write_enabled(brw); 131 132 /* _NEW_COLOR */ 133 key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; 134 135 /* If using the fragment shader backend, the program is always 136 * 8-wide. 137 */ 138 if (ctx->Shader.CurrentFragmentProgram) { 139 struct brw_shader *shader = (struct brw_shader *) 140 ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; 141 142 if (shader != NULL && shader->ir != NULL) { 143 key->is_glsl = GL_TRUE; 144 } 145 } 146 147 /* _NEW_DEPTH */ 148 key->stats_wm = intel->stats_wm; 149 150 /* _NEW_LINE */ 151 key->line_stipple = ctx->Line.StippleFlag; 152 153 /* _NEW_POLYGON */ 154 key->offset_enable = ctx->Polygon.OffsetFill; 155 key->offset_units = ctx->Polygon.OffsetUnits; 156 key->offset_factor = ctx->Polygon.OffsetFactor; 157} 158 159/** 160 * Setup wm hardware state. See page 225 of Volume 2 161 */ 162static drm_intel_bo * 163wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, 164 drm_intel_bo **reloc_bufs) 165{ 166 struct intel_context *intel = &brw->intel; 167 struct brw_wm_unit_state wm; 168 drm_intel_bo *bo; 169 170 memset(&wm, 0, sizeof(wm)); 171 172 wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; 173 wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ 174 wm.thread1.depth_coef_urb_read_offset = 1; 175 wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; 176 177 if (intel->gen == 5) 178 wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ 179 else 180 wm.thread1.binding_table_entry_count = key->nr_surfaces; 181 182 if (key->total_scratch != 0) { 183 wm.thread2.scratch_space_base_pointer = 184 brw->wm.scratch_bo->offset >> 10; /* reloc */ 185 wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11; 186 } else { 187 wm.thread2.scratch_space_base_pointer = 0; 188 wm.thread2.per_thread_scratch_space = 0; 189 } 190 191 wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; 192 wm.thread3.urb_entry_read_length = key->urb_entry_read_length; 193 wm.thread3.urb_entry_read_offset = 0; 194 wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; 195 wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; 196 197 if (intel->gen == 5) 198 wm.wm4.sampler_count = 0; /* hardware requirement */ 199 else 200 wm.wm4.sampler_count = (key->sampler_count + 1) / 4; 201 202 if (brw->wm.sampler_bo != NULL) { 203 /* reloc */ 204 wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; 205 } else { 206 wm.wm4.sampler_state_pointer = 0; 207 } 208 209 wm.wm5.program_uses_depth = key->uses_depth; 210 wm.wm5.program_computes_depth = key->computes_depth; 211 wm.wm5.program_uses_killpixel = key->uses_kill; 212 213 if (key->is_glsl) 214 wm.wm5.enable_8_pix = 1; 215 else 216 wm.wm5.enable_16_pix = 1; 217 218 wm.wm5.max_threads = brw->wm_max_threads - 1; 219 220 if (key->color_write_enable || 221 key->uses_kill || 222 key->computes_depth) { 223 wm.wm5.thread_dispatch_enable = 1; 224 } 225 226 wm.wm5.legacy_line_rast = 0; 227 wm.wm5.legacy_global_depth_bias = 0; 228 wm.wm5.early_depth_test = 1; /* never need to disable */ 229 wm.wm5.line_aa_region_width = 0; 230 wm.wm5.line_endcap_aa_region_width = 1; 231 232 wm.wm5.polygon_stipple = key->polygon_stipple; 233 234 if (key->offset_enable) { 235 wm.wm5.depth_offset = 1; 236 /* Something wierd going on with legacy_global_depth_bias, 237 * offset_constant, scaling and MRD. This value passes glean 238 * but gives some odd results elsewere (eg. the 239 * quad-offset-units test). 240 */ 241 wm.global_depth_offset_constant = key->offset_units * 2; 242 243 /* This is the only value that passes glean: 244 */ 245 wm.global_depth_offset_scale = key->offset_factor; 246 } 247 248 wm.wm5.line_stipple = key->line_stipple; 249 250 if (unlikely(INTEL_DEBUG & DEBUG_STATS) || key->stats_wm) 251 wm.wm4.stats_enable = 1; 252 253 bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, 254 key, sizeof(*key), 255 reloc_bufs, 3, 256 &wm, sizeof(wm)); 257 258 /* Emit WM program relocation */ 259 drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0), 260 brw->wm.prog_bo, wm.thread0.grf_reg_count << 1, 261 I915_GEM_DOMAIN_INSTRUCTION, 0); 262 263 /* Emit scratch space relocation */ 264 if (key->total_scratch != 0) { 265 drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2), 266 brw->wm.scratch_bo, 267 wm.thread2.per_thread_scratch_space, 268 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); 269 } 270 271 /* Emit sampler state relocation */ 272 if (key->sampler_count != 0) { 273 drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4), 274 brw->wm.sampler_bo, (wm.wm4.stats_enable | 275 (wm.wm4.sampler_count << 2)), 276 I915_GEM_DOMAIN_INSTRUCTION, 0); 277 } 278 279 return bo; 280} 281 282 283static void upload_wm_unit( struct brw_context *brw ) 284{ 285 struct intel_context *intel = &brw->intel; 286 struct brw_wm_unit_key key; 287 drm_intel_bo *reloc_bufs[3]; 288 wm_unit_populate_key(brw, &key); 289 290 /* Allocate the necessary scratch space if we haven't already. Don't 291 * bother reducing the allocation later, since we use scratch so 292 * rarely. 293 */ 294 if (key.total_scratch) { 295 GLuint total = key.total_scratch * brw->wm_max_threads; 296 297 if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { 298 drm_intel_bo_unreference(brw->wm.scratch_bo); 299 brw->wm.scratch_bo = NULL; 300 } 301 if (brw->wm.scratch_bo == NULL) { 302 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, 303 "wm scratch", 304 total, 305 4096); 306 } 307 } 308 309 reloc_bufs[0] = brw->wm.prog_bo; 310 reloc_bufs[1] = brw->wm.scratch_bo; 311 reloc_bufs[2] = brw->wm.sampler_bo; 312 313 drm_intel_bo_unreference(brw->wm.state_bo); 314 brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, 315 &key, sizeof(key), 316 reloc_bufs, 3, 317 NULL); 318 if (brw->wm.state_bo == NULL) { 319 brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); 320 } 321} 322 323const struct brw_tracked_state brw_wm_unit = { 324 .dirty = { 325 .mesa = (_NEW_POLYGON | 326 _NEW_POLYGONSTIPPLE | 327 _NEW_LINE | 328 _NEW_COLOR | 329 _NEW_DEPTH | 330 _NEW_BUFFERS), 331 332 .brw = (BRW_NEW_FRAGMENT_PROGRAM | 333 BRW_NEW_CURBE_OFFSETS | 334 BRW_NEW_DEPTH_BUFFER | 335 BRW_NEW_NR_WM_SURFACES), 336 337 .cache = (CACHE_NEW_WM_PROG | 338 CACHE_NEW_SAMPLER) 339 }, 340 .prepare = upload_wm_unit, 341}; 342 343