genX_blorp_exec.c revision 7b035fd0c97939a65825f6e1b467b0d741382bc5
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25 26#include "intel_batchbuffer.h" 27#include "intel_mipmap_tree.h" 28 29#include "brw_context.h" 30#include "brw_state.h" 31 32#include "blorp_priv.h" 33 34#include "genxml/gen_macros.h" 35 36static void * 37blorp_emit_dwords(struct brw_context *brw, unsigned n) 38{ 39 intel_batchbuffer_begin(brw, n, RENDER_RING); 40 uint32_t *map = brw->batch.map_next; 41 brw->batch.map_next += n; 42 intel_batchbuffer_advance(brw); 43 return map; 44} 45 46struct blorp_address { 47 drm_intel_bo *buffer; 48 uint32_t read_domains; 49 uint32_t write_domain; 50 uint32_t offset; 51}; 52 53static uint64_t 54blorp_emit_reloc(struct brw_context *brw, void *location, 55 struct blorp_address address, uint32_t delta) 56{ 57 uint32_t offset = (char *)location - (char *)brw->batch.map; 58 if (brw->gen >= 8) { 59 return intel_batchbuffer_reloc64(brw, address.buffer, offset, 60 address.read_domains, 61 address.write_domain, 62 address.offset + delta); 63 } else { 64 return intel_batchbuffer_reloc(brw, address.buffer, offset, 65 address.read_domains, 66 address.write_domain, 67 address.offset + delta); 68 } 69} 70 71#define __gen_address_type struct blorp_address 72#define __gen_user_data struct brw_context 73 74static uint64_t 75__gen_combine_address(struct brw_context *brw, void *location, 76 struct blorp_address address, uint32_t delta) 77{ 78 if (address.buffer == NULL) { 79 return address.offset + delta; 80 } else { 81 return blorp_emit_reloc(brw, location, address, delta); 82 } 83} 84 85#include "genxml/genX_pack.h" 86 87#define _blorp_cmd_length(cmd) cmd ## _length 88#define _blorp_cmd_header(cmd) cmd ## _header 89#define _blorp_cmd_pack(cmd) cmd ## _pack 90 91#define blorp_emit(brw, cmd, name) \ 92 for (struct cmd name = { _blorp_cmd_header(cmd) }, \ 93 *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \ 94 __builtin_expect(_dst != NULL, 1); \ 95 _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \ 96 _dst = NULL) 97 98static void 99blorp_emit_sf_config(struct brw_context *brw, 100 const struct brw_blorp_params *params) 101{ 102 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; 103 104 /* 3DSTATE_SF 105 * 106 * Disable ViewportTransformEnable (dw2.1) 107 * 108 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D 109 * Primitives Overview": 110 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the 111 * use of screen- space coordinates). 112 * 113 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) 114 * and BackFaceFillMode (dw2.5:6) to SOLID(0). 115 * 116 * From the Sandy Bridge PRM, Volume 2, Part 1, Section 117 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: 118 * SOLID: Any triangle or rectangle object found to be front-facing 119 * is rendered as a solid object. This setting is required when 120 * (rendering rectangle (RECTLIST) objects. 121 */ 122 blorp_emit(brw, GENX(3DSTATE_SF), sf) { 123 sf.FrontFaceFillMode = FILL_MODE_SOLID; 124 sf.BackFaceFillMode = FILL_MODE_SOLID; 125 126 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? 127 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 128 129 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; 130 if (prog_data) { 131 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 132 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 133 sf.ConstantInterpolationEnable = prog_data->flat_inputs; 134 } else { 135 sf.NumberofSFOutputAttributes = 0; 136 sf.VertexURBEntryReadLength = 1; 137 } 138 } 139} 140 141static void 142blorp_emit_wm_config(struct brw_context *brw, 143 const struct brw_blorp_params *params) 144{ 145 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; 146 147 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be 148 * nonzero to prevent the GPU from hanging. While the documentation doesn't 149 * mention this explicitly, it notes that the valid range for the field is 150 * [1,39] = [2,40] threads, which excludes zero. 151 * 152 * To be safe (and to minimize extraneous code) we go ahead and fully 153 * configure the WM state whether or not there is a WM program. 154 */ 155 blorp_emit(brw, GENX(3DSTATE_WM), wm) { 156 wm.MaximumNumberofThreads = brw->max_wm_threads - 1; 157 158 switch (params->hiz_op) { 159 case GEN6_HIZ_OP_DEPTH_CLEAR: 160 wm.DepthBufferClear = true; 161 break; 162 case GEN6_HIZ_OP_DEPTH_RESOLVE: 163 wm.DepthBufferResolveEnable = true; 164 break; 165 case GEN6_HIZ_OP_HIZ_RESOLVE: 166 wm.HierarchicalDepthBufferResolveEnable = true; 167 break; 168 case GEN6_HIZ_OP_NONE: 169 break; 170 default: 171 unreachable("not reached"); 172 } 173 174 if (prog_data) { 175 wm.ThreadDispatchEnable = true; 176 177 wm.DispatchGRFStartRegisterforConstantSetupData0 = 178 prog_data->first_curbe_grf_0; 179 wm.DispatchGRFStartRegisterforConstantSetupData2 = 180 prog_data->first_curbe_grf_2; 181 182 wm.KernelStartPointer0 = params->wm_prog_kernel; 183 wm.KernelStartPointer2 = 184 params->wm_prog_kernel + prog_data->ksp_offset_2; 185 186 wm._8PixelDispatchEnable = prog_data->dispatch_8; 187 wm._16PixelDispatchEnable = prog_data->dispatch_16; 188 189 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 190 } 191 192 if (params->src.bo) { 193 wm.SamplerCount = 1; /* Up to 4 samplers */ 194 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */ 195 } 196 197 if (params->dst.surf.samples > 1) { 198 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 199 wm.MultisampleDispatchMode = 200 (prog_data && prog_data->persample_msaa_dispatch) ? 201 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; 202 } else { 203 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 204 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 205 } 206 } 207} 208 209 210static void 211blorp_emit_depth_stencil_config(struct brw_context *brw, 212 const struct brw_blorp_params *params) 213{ 214 brw_emit_depth_stall_flushes(brw); 215 216 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { 217 switch (params->depth.surf.dim) { 218 case ISL_SURF_DIM_1D: 219 db.SurfaceType = SURFTYPE_1D; 220 break; 221 case ISL_SURF_DIM_2D: 222 db.SurfaceType = SURFTYPE_2D; 223 break; 224 case ISL_SURF_DIM_3D: 225 db.SurfaceType = SURFTYPE_3D; 226 break; 227 } 228 229 db.SurfaceFormat = params->depth_format; 230 231 db.TiledSurface = true; 232 db.TileWalk = TILEWALK_YMAJOR; 233 db.MIPMapLayoutMode = MIPLAYOUT_BELOW; 234 235 db.HierarchicalDepthBufferEnable = true; 236 db.SeparateStencilBufferEnable = true; 237 238 db.Width = params->depth.surf.logical_level0_px.width - 1; 239 db.Height = params->depth.surf.logical_level0_px.height - 1; 240 db.RenderTargetViewExtent = db.Depth = 241 MAX2(params->depth.surf.logical_level0_px.depth, 242 params->depth.surf.logical_level0_px.array_len) - 1; 243 244 db.LOD = params->depth.view.base_level; 245 db.MinimumArrayElement = params->depth.view.base_array_layer; 246 247 db.SurfacePitch = params->depth.surf.row_pitch - 1; 248 db.SurfaceBaseAddress = (struct blorp_address) { 249 .buffer = params->depth.bo, 250 .read_domains = I915_GEM_DOMAIN_RENDER, 251 .write_domain = I915_GEM_DOMAIN_RENDER, 252 .offset = params->depth.offset, 253 }; 254 } 255 256 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { 257 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; 258 hiz.SurfaceBaseAddress = (struct blorp_address) { 259 .buffer = params->depth.aux_bo, 260 .read_domains = I915_GEM_DOMAIN_RENDER, 261 .write_domain = I915_GEM_DOMAIN_RENDER, 262 .offset = params->depth.aux_offset, 263 }; 264 } 265 266 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); 267} 268 269static uint32_t 270blorp_emit_blend_state(struct brw_context *brw, 271 const struct brw_blorp_params *params) 272{ 273 struct GENX(BLEND_STATE) blend; 274 memset(&blend, 0, sizeof(blend)); 275 276 for (unsigned i = 0; i < params->num_draw_buffers; ++i) { 277 blend.Entry[i].PreBlendColorClampEnable = true; 278 blend.Entry[i].PostBlendColorClampEnable = true; 279 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; 280 281 blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; 282 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; 283 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; 284 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; 285 } 286 287 uint32_t offset; 288 void *state = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, 289 GENX(BLEND_STATE_length) * 4, 64, &offset); 290 GENX(BLEND_STATE_pack)(NULL, state, &blend); 291 292 return offset; 293} 294 295static uint32_t 296blorp_emit_color_calc_state(struct brw_context *brw, 297 const struct brw_blorp_params *params) 298{ 299 uint32_t offset; 300 void *state = brw_state_batch(brw, AUB_TRACE_CC_STATE, 301 GENX(COLOR_CALC_STATE_length) * 4, 64, &offset); 302 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); 303 304 return offset; 305} 306 307static uint32_t 308blorp_emit_depth_stencil_state(struct brw_context *brw, 309 const struct brw_blorp_params *params) 310{ 311 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: 312 * - 7.5.3.1 Depth Buffer Clear 313 * - 7.5.3.2 Depth Buffer Resolve 314 * - 7.5.3.3 Hierarchical Depth Buffer Resolve 315 */ 316 struct GENX(DEPTH_STENCIL_STATE) ds = { 317 .DepthBufferWriteEnable = true, 318 }; 319 320 if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { 321 ds.DepthTestEnable = true; 322 ds.DepthTestFunction = COMPAREFUNCTION_NEVER; 323 } 324 325 uint32_t offset; 326 void *state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, 327 GENX(DEPTH_STENCIL_STATE_length) * 4, 64, 328 &offset); 329 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); 330 331 return offset; 332} 333 334static void 335blorp_emit_surface_states(struct brw_context *brw, 336 const struct brw_blorp_params *params) 337{ 338 uint32_t bind_offset; 339 uint32_t *bind = 340 brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, 341 sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, 342 32, /* alignment */ &bind_offset); 343 344 bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = 345 brw_blorp_emit_surface_state(brw, ¶ms->dst, 346 I915_GEM_DOMAIN_RENDER, 347 I915_GEM_DOMAIN_RENDER, true); 348 if (params->src.bo) { 349 bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = 350 brw_blorp_emit_surface_state(brw, ¶ms->src, 351 I915_GEM_DOMAIN_SAMPLER, 0, false); 352 } 353 354 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { 355 bt.PSBindingTableChange = true; 356 bt.PointertoPSBindingTable = bind_offset; 357 } 358} 359 360static void 361blorp_emit_sampler_state(struct brw_context *brw, 362 const struct brw_blorp_params *params) 363{ 364 struct GENX(SAMPLER_STATE) sampler = { 365 .MipModeFilter = MIPFILTER_NONE, 366 .MagModeFilter = MAPFILTER_LINEAR, 367 .MinModeFilter = MAPFILTER_LINEAR, 368 .MinLOD = 0, 369 .MaxLOD = 0, 370 .TCXAddressControlMode = TCM_CLAMP, 371 .TCYAddressControlMode = TCM_CLAMP, 372 .TCZAddressControlMode = TCM_CLAMP, 373 .MaximumAnisotropy = RATIO21, 374 .RAddressMinFilterRoundingEnable = true, 375 .RAddressMagFilterRoundingEnable = true, 376 .VAddressMinFilterRoundingEnable = true, 377 .VAddressMagFilterRoundingEnable = true, 378 .UAddressMinFilterRoundingEnable = true, 379 .UAddressMagFilterRoundingEnable = true, 380 .NonnormalizedCoordinateEnable = true, 381 }; 382 383 uint32_t offset; 384 void *state = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 385 GENX(SAMPLER_STATE_length) * 4, 32, &offset); 386 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); 387 388 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { 389 ssp.VSSamplerStateChange = true; 390 ssp.GSSamplerStateChange = true; 391 ssp.PSSamplerStateChange = true; 392 ssp.PointertoPSSamplerState = offset; 393 } 394} 395 396/* 3DSTATE_VIEWPORT_STATE_POINTERS */ 397static void 398blorp_emit_viewport_state(struct brw_context *brw, 399 const struct brw_blorp_params *params) 400{ 401 uint32_t cc_vp_offset; 402 403 void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, 404 GENX(CC_VIEWPORT_length) * 4, 32, 405 &cc_vp_offset); 406 407 GENX(CC_VIEWPORT_pack)(brw, state, 408 &(struct GENX(CC_VIEWPORT)) { 409 .MinimumDepth = 0.0, 410 .MaximumDepth = 1.0, 411 }); 412 413 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { 414 vsp.CCViewportStateChange = true; 415 vsp.PointertoCC_VIEWPORT = cc_vp_offset; 416 } 417} 418 419 420/** 421 * \brief Execute a blit or render pass operation. 422 * 423 * To execute the operation, this function manually constructs and emits a 424 * batch to draw a rectangle primitive. The batchbuffer is flushed before 425 * constructing and after emitting the batch. 426 * 427 * This function alters no GL state. 428 */ 429void 430genX(blorp_exec)(struct brw_context *brw, 431 const struct brw_blorp_params *params) 432{ 433 uint32_t blend_state_offset = 0; 434 uint32_t color_calc_state_offset = 0; 435 uint32_t depth_stencil_state_offset; 436 437 /* Emit workaround flushes when we switch from drawing to blorping. */ 438 brw_emit_post_sync_nonzero_flush(brw); 439 440 brw_upload_state_base_address(brw); 441 442 gen6_blorp_emit_vertices(brw, params); 443 444 /* 3DSTATE_URB 445 * 446 * Assign the entire URB to the VS. Even though the VS disabled, URB space 447 * is still needed because the clipper loads the VUE's from the URB. From 448 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, 449 * Dword 1.15:0 "VS Number of URB Entries": 450 * This field is always used (even if VS Function Enable is DISABLED). 451 * 452 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can 453 * safely ignore it because this batch contains only one draw call. 454 * Because of URB corruption caused by allocating a previous GS unit 455 * URB entry to the VS unit, software is required to send a “GS NULL 456 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) 457 * plus a dummy DRAW call before any case where VS will be taking over 458 * GS URB space. 459 */ 460 blorp_emit(brw, GENX(3DSTATE_URB), urb) { 461 urb.VSNumberofURBEntries = brw->urb.max_vs_entries; 462 } 463 464 if (params->wm_prog_data) { 465 blend_state_offset = blorp_emit_blend_state(brw, params); 466 color_calc_state_offset = blorp_emit_color_calc_state(brw, params); 467 } 468 depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params); 469 470 /* 3DSTATE_CC_STATE_POINTERS 471 * 472 * The pointer offsets are relative to 473 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. 474 * 475 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. 476 */ 477 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) { 478 cc.BLEND_STATEChange = true; 479 cc.COLOR_CALC_STATEChange = true; 480 cc.DEPTH_STENCIL_STATEChange = true; 481 cc.PointertoBLEND_STATE = blend_state_offset; 482 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; 483 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; 484 } 485 486 blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs); 487 blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs); 488 blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps); 489 490 if (params->wm_prog_data) 491 blorp_emit_surface_states(brw, params); 492 493 if (params->src.bo) 494 blorp_emit_sampler_state(brw, params); 495 496 gen6_emit_3dstate_multisample(brw, params->dst.surf.samples); 497 498 blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) { 499 mask.SampleMask = (1 << params->dst.surf.samples) - 1; 500 } 501 502 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, 503 * 3DSTATE_VS, Dword 5.0 "VS Function Enable": 504 * 505 * [DevSNB] A pipeline flush must be programmed prior to a 506 * 3DSTATE_VS command that causes the VS Function Enable to 507 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL 508 * command with CS stall bit set and a post sync operation. 509 * 510 * We've already done one at the start of the BLORP operation. 511 */ 512 blorp_emit(brw, GENX(3DSTATE_VS), vs); 513 blorp_emit(brw, GENX(3DSTATE_GS), gs); 514 515 blorp_emit(brw, GENX(3DSTATE_CLIP), clip) { 516 clip.PerspectiveDivideDisable = true; 517 } 518 519 blorp_emit_sf_config(brw, params); 520 blorp_emit_wm_config(brw, params); 521 522 blorp_emit_viewport_state(brw, params); 523 524 if (params->depth.bo) { 525 blorp_emit_depth_stencil_config(brw, params); 526 } else { 527 brw_emit_depth_stall_flushes(brw); 528 529 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { 530 db.SurfaceType = SURFTYPE_NULL; 531 db.SurfaceFormat = D32_FLOAT; 532 } 533 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz); 534 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); 535 } 536 537 /* 3DSTATE_CLEAR_PARAMS 538 * 539 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: 540 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE 541 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. 542 */ 543 blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) { 544 clear.DepthClearValueValid = true; 545 clear.DepthClearValue = params->depth.clear_color.u32[0]; 546 } 547 548 blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 549 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; 550 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; 551 } 552 553 blorp_emit(brw, GENX(3DPRIMITIVE), prim) { 554 prim.VertexAccessType = SEQUENTIAL; 555 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; 556 prim.VertexCountPerInstance = 3; 557 prim.InstanceCount = params->num_layers; 558 } 559} 560