1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 */ 26 27#include "si_pipe.h" 28#include "radeon/r600_cs.h" 29 30static unsigned si_descriptor_list_cs_space(unsigned count, unsigned element_size) 31{ 32 /* Ensure we have enough space to start a new range in a hole */ 33 assert(element_size >= 3); 34 35 /* 5 dwords for possible load to reinitialize when we have no preamble 36 * IB + 5 dwords for write to L2 + 3 bytes for every range written to 37 * CE RAM. 38 */ 39 return 5 + 5 + 3 + count * element_size; 40} 41 42static unsigned si_ce_needed_cs_space(void) 43{ 44 unsigned space = 0; 45 46 space += si_descriptor_list_cs_space(SI_NUM_CONST_BUFFERS, 4); 47 space += si_descriptor_list_cs_space(SI_NUM_SHADER_BUFFERS, 4); 48 space += si_descriptor_list_cs_space(SI_NUM_SAMPLERS, 16); 49 space += si_descriptor_list_cs_space(SI_NUM_IMAGES, 8); 50 space *= SI_NUM_SHADERS; 51 52 space += si_descriptor_list_cs_space(SI_NUM_RW_BUFFERS, 4); 53 54 /* Increment CE counter packet */ 55 space += 2; 56 57 return space; 58} 59 60/* initialize */ 61void si_need_cs_space(struct si_context *ctx) 62{ 63 struct radeon_winsys_cs *cs = ctx->b.gfx.cs; 64 struct radeon_winsys_cs *ce_ib = ctx->ce_ib; 65 66 /* There is no need to flush the DMA IB here, because 67 * r600_need_dma_space always flushes the GFX IB if there is 68 * a conflict, which means any unflushed DMA commands automatically 69 * precede the GFX IB (= they had no dependency on the GFX IB when 70 * they were submitted). 71 */ 72 73 /* There are two memory usage counters in the winsys for all buffers 74 * that have been added (cs_add_buffer) and two counters in the pipe 75 * driver for those that haven't been added yet. 76 */ 77 if (unlikely(!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs, 78 ctx->b.vram, ctx->b.gtt))) { 79 ctx->b.gtt = 0; 80 ctx->b.vram = 0; 81 ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 82 return; 83 } 84 ctx->b.gtt = 0; 85 ctx->b.vram = 0; 86 87 /* If the CS is sufficiently large, don't count the space needed 88 * and just flush if there is not enough space left. 89 */ 90 if (!ctx->b.ws->cs_check_space(cs, 2048) || 91 (ce_ib && !ctx->b.ws->cs_check_space(ce_ib, si_ce_needed_cs_space()))) 92 ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 93} 94 95void si_context_gfx_flush(void *context, unsigned flags, 96 struct pipe_fence_handle **fence) 97{ 98 struct si_context *ctx = context; 99 struct radeon_winsys_cs *cs = ctx->b.gfx.cs; 100 struct radeon_winsys *ws = ctx->b.ws; 101 102 if (ctx->gfx_flush_in_progress) 103 return; 104 105 if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size)) 106 return; 107 108 if (r600_check_device_reset(&ctx->b)) 109 return; 110 111 /* If the state tracker is flushing the GFX IB, r600_flush_from_st is 112 * responsible for flushing the DMA IB and merging the fences from both. 113 * This code is only needed when the driver flushes the GFX IB 114 * internally, and it never asks for a fence handle. 115 */ 116 if (radeon_emitted(ctx->b.dma.cs, 0)) { 117 assert(fence == NULL); /* internal flushes only */ 118 ctx->b.dma.flush(ctx, flags, NULL); 119 } 120 121 ctx->gfx_flush_in_progress = true; 122 123 r600_preflush_suspend_features(&ctx->b); 124 125 ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | 126 SI_CONTEXT_PS_PARTIAL_FLUSH; 127 128 /* DRM 3.1.0 doesn't flush TC for VI correctly. */ 129 if (ctx->b.chip_class == VI && ctx->b.screen->info.drm_minor <= 1) 130 ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 | 131 SI_CONTEXT_INV_VMEM_L1; 132 133 si_emit_cache_flush(ctx); 134 135 if (ctx->trace_buf) 136 si_trace_emit(ctx); 137 138 if (ctx->is_debug) { 139 /* Save the IB for debug contexts. */ 140 radeon_clear_saved_cs(&ctx->last_gfx); 141 radeon_save_cs(ws, cs, &ctx->last_gfx); 142 r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); 143 r600_resource_reference(&ctx->trace_buf, NULL); 144 } 145 146 /* Flush the CS. */ 147 ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); 148 if (fence) 149 ws->fence_reference(fence, ctx->b.last_gfx_fence); 150 ctx->b.num_gfx_cs_flushes++; 151 152 /* Check VM faults if needed. */ 153 if (ctx->screen->b.debug_flags & DBG_CHECK_VM) { 154 /* Use conservative timeout 800ms, after which we won't wait any 155 * longer and assume the GPU is hung. 156 */ 157 ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000); 158 159 si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX); 160 } 161 162 si_begin_new_cs(ctx); 163 ctx->gfx_flush_in_progress = false; 164} 165 166void si_begin_new_cs(struct si_context *ctx) 167{ 168 if (ctx->is_debug) { 169 uint32_t zero = 0; 170 171 /* Create a buffer used for writing trace IDs and initialize it to 0. */ 172 assert(!ctx->trace_buf); 173 ctx->trace_buf = (struct r600_resource*) 174 pipe_buffer_create(ctx->b.b.screen, 0, 175 PIPE_USAGE_STAGING, 4); 176 if (ctx->trace_buf) 177 pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, 178 0, sizeof(zero), &zero); 179 ctx->trace_id = 0; 180 } 181 182 if (ctx->trace_buf) 183 si_trace_emit(ctx); 184 185 /* Flush read caches at the beginning of CS not flushed by the kernel. */ 186 if (ctx->b.chip_class >= CIK) 187 ctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 188 SI_CONTEXT_INV_ICACHE; 189 190 ctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 191 192 /* set all valid group as dirty so they get reemited on 193 * next draw command 194 */ 195 si_pm4_reset_emitted(ctx); 196 197 /* The CS initialization should be emitted before everything else. */ 198 si_pm4_emit(ctx, ctx->init_config); 199 if (ctx->init_config_gs_rings) 200 si_pm4_emit(ctx, ctx->init_config_gs_rings); 201 202 if (ctx->ce_preamble_ib) 203 si_ce_enable_loads(ctx->ce_preamble_ib); 204 else if (ctx->ce_ib) 205 si_ce_enable_loads(ctx->ce_ib); 206 207 if (ctx->ce_preamble_ib) 208 si_ce_reinitialize_all_descriptors(ctx); 209 210 ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; 211 ctx->framebuffer.dirty_zsbuf = true; 212 si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); 213 214 si_mark_atom_dirty(ctx, &ctx->clip_regs); 215 si_mark_atom_dirty(ctx, &ctx->clip_state.atom); 216 ctx->msaa_sample_locs.nr_samples = 0; 217 si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom); 218 si_mark_atom_dirty(ctx, &ctx->msaa_config); 219 si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); 220 si_mark_atom_dirty(ctx, &ctx->cb_render_state); 221 si_mark_atom_dirty(ctx, &ctx->blend_color.atom); 222 si_mark_atom_dirty(ctx, &ctx->db_render_state); 223 si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); 224 si_mark_atom_dirty(ctx, &ctx->spi_map); 225 si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); 226 si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); 227 si_all_descriptors_begin_new_cs(ctx); 228 229 ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; 230 ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; 231 ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; 232 si_mark_atom_dirty(ctx, &ctx->b.scissors.atom); 233 si_mark_atom_dirty(ctx, &ctx->b.viewports.atom); 234 235 r600_postflush_resume_features(&ctx->b); 236 237 assert(!ctx->b.gfx.cs->prev_dw); 238 ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; 239 240 /* Invalidate various draw states so that they are emitted before 241 * the first draw call. */ 242 si_invalidate_draw_sh_constants(ctx); 243 ctx->last_index_size = -1; 244 ctx->last_primitive_restart_en = -1; 245 ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; 246 ctx->last_gs_out_prim = -1; 247 ctx->last_prim = -1; 248 ctx->last_multi_vgt_param = -1; 249 ctx->last_rast_prim = -1; 250 ctx->last_sc_line_stipple = ~0; 251 ctx->last_vtx_reuse_depth = -1; 252 ctx->emit_scratch_reloc = true; 253 ctx->last_ls = NULL; 254 ctx->last_tcs = NULL; 255 ctx->last_tes_sh_base = -1; 256 ctx->last_num_tcs_input_cp = -1; 257 258 ctx->cs_shader_state.initialized = false; 259} 260