r300_hyperz.c revision ebf69f2c508e2cb6df1437ef0e31cbc8b808824d
1/* 2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> 3 * Copyright 2009 Marek Olšák <maraeo@gmail.com> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "r300_context.h" 25#include "r300_reg.h" 26#include "r300_fs.h" 27#include "r300_winsys.h" 28 29#include "util/u_format.h" 30#include "util/u_mm.h" 31 32/* 33 HiZ rules - taken from various docs 34 1. HiZ only works on depth values 35 2. Cannot HiZ if stencil fail or zfail is !KEEP 36 3. on R300/400, HiZ is disabled if depth test is EQUAL 37 4. comparison changes without clears usually mean disabling HiZ 38*/ 39/*****************************************************************************/ 40/* The HyperZ setup */ 41/*****************************************************************************/ 42 43static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) 44{ 45 struct r300_dsa_state *dsa = r300->dsa_state.state; 46 47 if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask) 48 return HIZ_FUNC_NONE; 49 50 switch (dsa->dsa.depth.func) { 51 case PIPE_FUNC_NEVER: 52 case PIPE_FUNC_EQUAL: 53 case PIPE_FUNC_NOTEQUAL: 54 case PIPE_FUNC_ALWAYS: 55 return HIZ_FUNC_NONE; 56 57 case PIPE_FUNC_LESS: 58 case PIPE_FUNC_LEQUAL: 59 return HIZ_FUNC_MAX; 60 61 case PIPE_FUNC_GREATER: 62 case PIPE_FUNC_GEQUAL: 63 return HIZ_FUNC_MIN; 64 } 65} 66 67/* Return what's used for the depth test (either minimum or maximum). */ 68static unsigned r300_get_sc_hz_max(struct r300_context *r300) 69{ 70 struct r300_dsa_state *dsa = r300->dsa_state.state; 71 unsigned func = dsa->dsa.depth.func; 72 73 return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; 74} 75 76static boolean r300_is_hiz_func_valid(struct r300_context *r300) 77{ 78 struct r300_dsa_state *dsa = r300->dsa_state.state; 79 unsigned func = dsa->dsa.depth.func; 80 81 if (r300->hiz_func == HIZ_FUNC_NONE) 82 return TRUE; 83 84 /* func1 is less/lessthan */ 85 if (r300->hiz_func == HIZ_FUNC_MAX && 86 (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) 87 return FALSE; 88 89 /* func1 is greater/greaterthan */ 90 if (r300->hiz_func == HIZ_FUNC_MIN && 91 (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) 92 return FALSE; 93 94 return TRUE; 95} 96 97static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) 98{ 99 return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || 100 s->zfail_op != PIPE_STENCIL_OP_KEEP); 101} 102 103static boolean r300_can_hiz(struct r300_context *r300) 104{ 105 struct r300_dsa_state *dsa = r300->dsa_state.state; 106 struct r300_screen *r300screen = r300->screen; 107 108 /* shader writes depth - no HiZ */ 109 if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ 110 return FALSE; 111 112 if (r300->query_current) 113 return FALSE; 114 115 /* if stencil fail/zfail op is not KEEP */ 116 if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || 117 r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) 118 return FALSE; 119 120 if (dsa->dsa.depth.enabled) { 121 /* if depth func is EQUAL pre-r500 */ 122 if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) 123 return FALSE; 124 125 /* if depth func is NOTEQUAL */ 126 if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) 127 return FALSE; 128 } 129 return TRUE; 130} 131 132static void r300_update_hyperz(struct r300_context* r300) 133{ 134 struct r300_hyperz_state *z = 135 (struct r300_hyperz_state*)r300->hyperz_state.state; 136 struct pipe_framebuffer_state *fb = 137 (struct pipe_framebuffer_state*)r300->fb_state.state; 138 struct r300_resource *zstex = 139 fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; 140 141 z->gb_z_peq_config = 0; 142 z->zb_bw_cntl = 0; 143 z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; 144 z->flush = 0; 145 146 if (r300->cbzb_clear) { 147 z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; 148 return; 149 } 150 151 if (!zstex || 152 !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) 153 return; 154 155 /* Zbuffer compression. */ 156 if (r300->zmask_in_use && !r300->hyperz_locked) { 157 z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | 158 /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ 159 R300_RD_COMP_ENABLE; 160 161 if (!r300->zmask_decompress) { 162 z->zb_bw_cntl |= R300_WR_COMP_ENABLE; 163 } 164 } 165 166 if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) { 167 z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; 168 } 169 170 /* HiZ. */ 171 if (r300->hiz_in_use && !r300->hyperz_locked) { 172 /* Set the HiZ function if needed. */ 173 if (r300->hiz_func == HIZ_FUNC_NONE) { 174 r300->hiz_func = r300_get_hiz_func(r300); 175 } 176 177 /* If the depth function is inverted, HiZ must be disabled. */ 178 if (!r300_is_hiz_func_valid(r300)) { 179 r300->hiz_in_use = FALSE; 180 } else if (r300_can_hiz(r300)) { 181 /* Setup the HiZ bits. */ 182 z->zb_bw_cntl |= 183 R300_HIZ_ENABLE | 184 (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); 185 186 z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | 187 r300_get_sc_hz_max(r300); 188 189 if (r300->screen->caps.is_r500) { 190 z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | 191 R500_HIZ_EQUAL_REJECT_ENABLE; 192 } 193 } 194 } 195 196 /* R500-specific features and optimizations. */ 197 if (r300->screen->caps.is_r500) { 198 z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | 199 R500_COVERED_PTR_MASKING_ENABLE; 200 } 201} 202 203/*****************************************************************************/ 204/* The ZTOP state */ 205/*****************************************************************************/ 206 207static boolean r300_dsa_writes_stencil( 208 struct pipe_stencil_state *s) 209{ 210 return s->enabled && s->writemask && 211 (s->fail_op != PIPE_STENCIL_OP_KEEP || 212 s->zfail_op != PIPE_STENCIL_OP_KEEP || 213 s->zpass_op != PIPE_STENCIL_OP_KEEP); 214} 215 216static boolean r300_dsa_writes_depth_stencil( 217 struct pipe_depth_stencil_alpha_state *dsa) 218{ 219 /* We are interested only in the cases when a depth or stencil value 220 * can be changed. */ 221 222 if (dsa->depth.enabled && dsa->depth.writemask && 223 dsa->depth.func != PIPE_FUNC_NEVER) 224 return TRUE; 225 226 if (r300_dsa_writes_stencil(&dsa->stencil[0]) || 227 r300_dsa_writes_stencil(&dsa->stencil[1])) 228 return TRUE; 229 230 return FALSE; 231} 232 233static boolean r300_dsa_alpha_test_enabled( 234 struct pipe_depth_stencil_alpha_state *dsa) 235{ 236 /* We are interested only in the cases when alpha testing can kill 237 * a fragment. */ 238 239 return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS; 240} 241 242static void r300_update_ztop(struct r300_context* r300) 243{ 244 struct r300_ztop_state* ztop_state = 245 (struct r300_ztop_state*)r300->ztop_state.state; 246 uint32_t old_ztop = ztop_state->z_buffer_top; 247 248 /* This is important enough that I felt it warranted a comment. 249 * 250 * According to the docs, these are the conditions where ZTOP must be 251 * disabled: 252 * 1) Alpha testing enabled 253 * 2) Texture kill instructions in fragment shader 254 * 3) Chroma key culling enabled 255 * 4) W-buffering enabled 256 * 257 * The docs claim that for the first three cases, if no ZS writes happen, 258 * then ZTOP can be used. 259 * 260 * (3) will never apply since we do not support chroma-keyed operations. 261 * (4) will need to be re-examined (and this comment updated) if/when 262 * Hyper-Z becomes supported. 263 * 264 * Additionally, the following conditions require disabled ZTOP: 265 * 5) Depth writes in fragment shader 266 * 6) Outstanding occlusion queries 267 * 268 * This register causes stalls all the way from SC to CB when changed, 269 * but it is buffered on-chip so it does not hurt to write it if it has 270 * not changed. 271 * 272 * ~C. 273 */ 274 275 /* ZS writes */ 276 if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && 277 (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ 278 r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ 279 ztop_state->z_buffer_top = R300_ZTOP_DISABLE; 280 } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ 281 ztop_state->z_buffer_top = R300_ZTOP_DISABLE; 282 } else if (r300->query_current) { /* (6) */ 283 ztop_state->z_buffer_top = R300_ZTOP_DISABLE; 284 } else { 285 ztop_state->z_buffer_top = R300_ZTOP_ENABLE; 286 } 287 if (ztop_state->z_buffer_top != old_ztop) 288 r300_mark_atom_dirty(r300, &r300->ztop_state); 289} 290 291void r300_update_hyperz_state(struct r300_context* r300) 292{ 293 r300_update_ztop(r300); 294 295 if (r300->hyperz_state.dirty) { 296 r300_update_hyperz(r300); 297 } 298} 299