genX_blorp_exec.c revision ac08bc8ac220f22333536a9f881fde1e5607148e
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25 26#include "intel_batchbuffer.h" 27#include "intel_mipmap_tree.h" 28 29#include "brw_context.h" 30#include "brw_state.h" 31 32#include "blorp_priv.h" 33 34#include "genxml/gen_macros.h" 35 36static void * 37blorp_emit_dwords(struct brw_context *brw, unsigned n) 38{ 39 intel_batchbuffer_begin(brw, n, RENDER_RING); 40 uint32_t *map = brw->batch.map_next; 41 brw->batch.map_next += n; 42 intel_batchbuffer_advance(brw); 43 return map; 44} 45 46static uint64_t 47blorp_emit_reloc(struct brw_context *brw, void *location, 48 struct blorp_address address, uint32_t delta) 49{ 50 uint32_t offset = (char *)location - (char *)brw->batch.map; 51 if (brw->gen >= 8) { 52 return intel_batchbuffer_reloc64(brw, address.buffer, offset, 53 address.read_domains, 54 address.write_domain, 55 address.offset + delta); 56 } else { 57 return intel_batchbuffer_reloc(brw, address.buffer, offset, 58 address.read_domains, 59 address.write_domain, 60 address.offset + delta); 61 } 62} 63 64static void * 65blorp_alloc_dynamic_state(struct blorp_context *blorp, 66 enum aub_state_struct_type type, 67 uint32_t size, 68 uint32_t alignment, 69 uint32_t *offset) 70{ 71 struct brw_context *brw = blorp->driver_ctx; 72 return brw_state_batch(brw, type, size, alignment, offset); 73} 74 75static void * 76blorp_alloc_vertex_buffer(struct blorp_context *blorp, uint32_t size, 77 struct blorp_address *addr) 78{ 79 struct brw_context *brw = blorp->driver_ctx; 80 81 uint32_t offset; 82 void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, 83 size, 32, &offset); 84 85 *addr = (struct blorp_address) { 86 .buffer = brw->batch.bo, 87 .read_domains = I915_GEM_DOMAIN_VERTEX, 88 .write_domain = 0, 89 .offset = offset, 90 }; 91 92 return data; 93} 94 95static void 96blorp_emit_urb_config(struct brw_context *brw, unsigned vs_entry_size) 97{ 98#if GEN_GEN >= 7 99 if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && 100 brw->urb.vsize >= vs_entry_size) 101 return; 102 103 brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; 104 105 gen7_upload_urb(brw, vs_entry_size, false, false); 106#else 107 gen6_upload_urb(brw, vs_entry_size, false, 0); 108#endif 109} 110 111static void 112blorp_emit_3dstate_multisample(struct brw_context *brw, unsigned samples) 113{ 114#if GEN_GEN >= 8 115 gen8_emit_3dstate_multisample(brw, samples); 116#else 117 gen6_emit_3dstate_multisample(brw, samples); 118#endif 119} 120 121#define __gen_address_type struct blorp_address 122#define __gen_user_data struct brw_context 123 124static uint64_t 125__gen_combine_address(struct brw_context *brw, void *location, 126 struct blorp_address address, uint32_t delta) 127{ 128 if (address.buffer == NULL) { 129 return address.offset + delta; 130 } else { 131 return blorp_emit_reloc(brw, location, address, delta); 132 } 133} 134 135#include "genxml/genX_pack.h" 136 137#define _blorp_cmd_length(cmd) cmd ## _length 138#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias 139#define _blorp_cmd_header(cmd) cmd ## _header 140#define _blorp_cmd_pack(cmd) cmd ## _pack 141 142#define blorp_emit(brw, cmd, name) \ 143 for (struct cmd name = { _blorp_cmd_header(cmd) }, \ 144 *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \ 145 __builtin_expect(_dst != NULL, 1); \ 146 _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \ 147 _dst = NULL) 148 149#define blorp_emitn(batch, cmd, n) ({ \ 150 uint32_t *_dw = blorp_emit_dwords(batch, n); \ 151 struct cmd template = { \ 152 _blorp_cmd_header(cmd), \ 153 .DWordLength = n - _blorp_cmd_length_bias(cmd), \ 154 }; \ 155 _blorp_cmd_pack(cmd)(batch, _dw, &template); \ 156 _dw + 1; /* Array starts at dw[1] */ \ 157 }) 158 159/* Once vertex fetcher has written full VUE entries with complete 160 * header the space requirement is as follows per vertex (in bytes): 161 * 162 * Header Position Program constants 163 * +--------+------------+-------------------+ 164 * | 16 | 16 | n x 16 | 165 * +--------+------------+-------------------+ 166 * 167 * where 'n' stands for number of varying inputs expressed as vec4s. 168 * 169 * The URB size is in turn expressed in 64 bytes (512 bits). 170 */ 171static inline unsigned 172gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) 173{ 174 const unsigned num_varyings = 175 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; 176 const unsigned total_needed = 16 + 16 + num_varyings * 16; 177 178 return DIV_ROUND_UP(total_needed, 64); 179} 180 181/* 3DSTATE_URB 182 * 3DSTATE_URB_VS 183 * 3DSTATE_URB_HS 184 * 3DSTATE_URB_DS 185 * 3DSTATE_URB_GS 186 * 187 * Assign the entire URB to the VS. Even though the VS disabled, URB space 188 * is still needed because the clipper loads the VUE's from the URB. From 189 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, 190 * Dword 1.15:0 "VS Number of URB Entries": 191 * This field is always used (even if VS Function Enable is DISABLED). 192 * 193 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can 194 * safely ignore it because this batch contains only one draw call. 195 * Because of URB corruption caused by allocating a previous GS unit 196 * URB entry to the VS unit, software is required to send a “GS NULL 197 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) 198 * plus a dummy DRAW call before any case where VS will be taking over 199 * GS URB space. 200 * 201 * If the 3DSTATE_URB_VS is emitted, than the others must be also. 202 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: 203 * 204 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be 205 * programmed in order for the programming of this state to be 206 * valid. 207 */ 208static void 209emit_urb_config(struct brw_context *brw, 210 const struct brw_blorp_params *params) 211{ 212 blorp_emit_urb_config(brw, gen7_blorp_get_vs_entry_size(params)); 213} 214 215static void 216blorp_emit_vertex_data(struct brw_context *brw, 217 const struct brw_blorp_params *params, 218 struct blorp_address *addr, 219 uint32_t *size) 220{ 221 const float vertices[] = { 222 /* v0 */ (float)params->x0, (float)params->y1, 223 /* v1 */ (float)params->x1, (float)params->y1, 224 /* v2 */ (float)params->x0, (float)params->y0, 225 }; 226 227 void *data = blorp_alloc_vertex_buffer(&brw->blorp, sizeof(vertices), addr); 228 memcpy(data, vertices, sizeof(vertices)); 229 *size = sizeof(vertices); 230} 231 232static void 233blorp_emit_input_varying_data(struct brw_context *brw, 234 const struct brw_blorp_params *params, 235 struct blorp_address *addr, 236 uint32_t *size) 237{ 238 const unsigned vec4_size_in_bytes = 4 * sizeof(float); 239 const unsigned max_num_varyings = 240 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); 241 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; 242 243 *size = num_varyings * vec4_size_in_bytes; 244 245 const float *const inputs_src = (const float *)¶ms->wm_inputs; 246 float *inputs = blorp_alloc_vertex_buffer(&brw->blorp, *size, addr); 247 248 /* Walk over the attribute slots, determine if the attribute is used by 249 * the program and when necessary copy the values from the input storage to 250 * the vertex data buffer. 251 */ 252 for (unsigned i = 0; i < max_num_varyings; i++) { 253 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; 254 255 if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) 256 continue; 257 258 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); 259 260 inputs += 4; 261 } 262} 263 264static void 265blorp_emit_vertex_buffers(struct brw_context *brw, 266 const struct brw_blorp_params *params) 267{ 268 struct GENX(VERTEX_BUFFER_STATE) vb[2]; 269 memset(vb, 0, sizeof(vb)); 270 271 unsigned num_buffers = 1; 272 273 uint32_t size; 274 blorp_emit_vertex_data(brw, params, &vb[0].BufferStartingAddress, &size); 275 vb[0].VertexBufferIndex = 0; 276 vb[0].BufferPitch = 2 * sizeof(float); 277 vb[0].VertexBufferMOCS = brw->blorp.mocs.vb; 278#if GEN_GEN >= 7 279 vb[0].AddressModifyEnable = true; 280#endif 281#if GEN_GEN >= 8 282 vb[0].BufferSize = size; 283#else 284 vb[0].BufferAccessType = VERTEXDATA; 285 vb[0].EndAddress = vb[0].BufferStartingAddress; 286 vb[0].EndAddress.offset += size - 1; 287#endif 288 289 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) { 290 blorp_emit_input_varying_data(brw, params, 291 &vb[1].BufferStartingAddress, &size); 292 vb[1].VertexBufferIndex = 1; 293 vb[1].BufferPitch = 0; 294 vb[1].VertexBufferMOCS = brw->blorp.mocs.vb; 295#if GEN_GEN >= 7 296 vb[1].AddressModifyEnable = true; 297#endif 298#if GEN_GEN >= 8 299 vb[1].BufferSize = size; 300#else 301 vb[1].BufferAccessType = INSTANCEDATA; 302 vb[1].EndAddress = vb[1].BufferStartingAddress; 303 vb[1].EndAddress.offset += size - 1; 304#endif 305 num_buffers++; 306 } 307 308 const unsigned num_dwords = 309 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers; 310 uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords); 311 312 for (unsigned i = 0; i < num_buffers; i++) { 313 GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &vb[i]); 314 dw += GENX(VERTEX_BUFFER_STATE_length); 315 } 316} 317 318static void 319blorp_emit_vertex_elements(struct brw_context *brw, 320 const struct brw_blorp_params *params) 321{ 322 const unsigned num_varyings = 323 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; 324 const unsigned num_elements = 2 + num_varyings; 325 326 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements]; 327 memset(ve, 0, num_elements * sizeof(*ve)); 328 329 /* Setup VBO for the rectangle primitive.. 330 * 331 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three 332 * vertices. The vertices reside in screen space with DirectX 333 * coordinates (that is, (0, 0) is the upper left corner). 334 * 335 * v2 ------ implied 336 * | | 337 * | | 338 * v0 ----- v1 339 * 340 * Since the VS is disabled, the clipper loads each VUE directly from 341 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and 342 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: 343 * dw0: Reserved, MBZ. 344 * dw1: Render Target Array Index. The HiZ op does not use indexed 345 * vertices, so set the dword to 0. 346 * dw2: Viewport Index. The HiZ op disables viewport mapping and 347 * scissoring, so set the dword to 0. 348 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, 349 * so set the dword to 0. 350 * dw4: Vertex Position X. 351 * dw5: Vertex Position Y. 352 * dw6: Vertex Position Z. 353 * dw7: Vertex Position W. 354 * 355 * dw8: Flat vertex input 0 356 * dw9: Flat vertex input 1 357 * ... 358 * dwn: Flat vertex input n - 8 359 * 360 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 361 * "Vertex URB Entry (VUE) Formats". 362 * 363 * Only vertex position X and Y are going to be variable, Z is fixed to 364 * zero and W to one. Header words dw0-3 are all zero. There is no need to 365 * include the fixed values in the vertex buffer. Vertex fetcher can be 366 * instructed to fill vertex elements with constant values of one and zero 367 * instead of reading them from the buffer. 368 * Flat inputs are program constants that are not interpolated. Moreover 369 * their values will be the same between vertices. 370 * 371 * See the vertex element setup below. 372 */ 373 ve[0].VertexBufferIndex = 0; 374 ve[0].Valid = true; 375 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 376 ve[0].SourceElementOffset = 0; 377 ve[0].Component0Control = VFCOMP_STORE_0; 378 ve[0].Component1Control = VFCOMP_STORE_0; 379 ve[0].Component2Control = VFCOMP_STORE_0; 380 ve[0].Component3Control = VFCOMP_STORE_0; 381 382 ve[1].VertexBufferIndex = 0; 383 ve[1].Valid = true; 384 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT; 385 ve[1].SourceElementOffset = 0; 386 ve[1].Component0Control = VFCOMP_STORE_SRC; 387 ve[1].Component1Control = VFCOMP_STORE_SRC; 388 ve[1].Component2Control = VFCOMP_STORE_0; 389 ve[1].Component3Control = VFCOMP_STORE_1_FP; 390 391 for (unsigned i = 0; i < num_varyings; ++i) { 392 ve[i + 2].VertexBufferIndex = 1; 393 ve[i + 2].Valid = true; 394 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 395 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float); 396 ve[i + 2].Component0Control = VFCOMP_STORE_SRC; 397 ve[i + 2].Component1Control = VFCOMP_STORE_SRC; 398 ve[i + 2].Component2Control = VFCOMP_STORE_SRC; 399 ve[i + 2].Component3Control = VFCOMP_STORE_SRC; 400 } 401 402 const unsigned num_dwords = 403 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements; 404 uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords); 405 406 for (unsigned i = 0; i < num_elements; i++) { 407 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &ve[i]); 408 dw += GENX(VERTEX_ELEMENT_STATE_length); 409 } 410 411#if GEN_GEN >= 8 412 blorp_emit(brw, GENX(3DSTATE_VF_SGVS), sgvs); 413 414 for (unsigned i = 0; i < num_elements; i++) { 415 blorp_emit(brw, GENX(3DSTATE_VF_INSTANCING), vf) { 416 vf.VertexElementIndex = i; 417 vf.InstancingEnable = false; 418 } 419 } 420 421 blorp_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), topo) { 422 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; 423 } 424#endif 425} 426 427static void 428blorp_emit_sf_config(struct brw_context *brw, 429 const struct brw_blorp_params *params) 430{ 431 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; 432 433 /* 3DSTATE_SF 434 * 435 * Disable ViewportTransformEnable (dw2.1) 436 * 437 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D 438 * Primitives Overview": 439 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the 440 * use of screen- space coordinates). 441 * 442 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) 443 * and BackFaceFillMode (dw2.5:6) to SOLID(0). 444 * 445 * From the Sandy Bridge PRM, Volume 2, Part 1, Section 446 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: 447 * SOLID: Any triangle or rectangle object found to be front-facing 448 * is rendered as a solid object. This setting is required when 449 * (rendering rectangle (RECTLIST) objects. 450 */ 451 452#if GEN_GEN >= 8 453 454 blorp_emit(brw, GENX(3DSTATE_SF), sf); 455 456 blorp_emit(brw, GENX(3DSTATE_RASTER), raster) { 457 raster.CullMode = CULLMODE_NONE; 458 } 459 460 blorp_emit(brw, GENX(3DSTATE_SBE), sbe) { 461 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; 462 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 463 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 464 sbe.ForceVertexURBEntryReadLength = true; 465 sbe.ForceVertexURBEntryReadOffset = true; 466 sbe.ConstantInterpolationEnable = prog_data->flat_inputs; 467 468#if GEN_GEN >= 9 469 for (unsigned i = 0; i < 32; i++) 470 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; 471#endif 472 } 473 474#elif GEN_GEN >= 7 475 476 blorp_emit(brw, GENX(3DSTATE_SF), sf) { 477 sf.FrontFaceFillMode = FILL_MODE_SOLID; 478 sf.BackFaceFillMode = FILL_MODE_SOLID; 479 480 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? 481 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 482 483#if GEN_GEN == 7 484 sf.DepthBufferSurfaceFormat = params->depth_format; 485#endif 486 } 487 488 blorp_emit(brw, GENX(3DSTATE_SBE), sbe) { 489 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; 490 if (prog_data) { 491 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 492 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 493 sbe.ConstantInterpolationEnable = prog_data->flat_inputs; 494 } else { 495 sbe.NumberofSFOutputAttributes = 0; 496 sbe.VertexURBEntryReadLength = 1; 497 } 498 } 499 500#else /* GEN_GEN <= 6 */ 501 502 blorp_emit(brw, GENX(3DSTATE_SF), sf) { 503 sf.FrontFaceFillMode = FILL_MODE_SOLID; 504 sf.BackFaceFillMode = FILL_MODE_SOLID; 505 506 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? 507 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 508 509 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; 510 if (prog_data) { 511 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 512 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 513 sf.ConstantInterpolationEnable = prog_data->flat_inputs; 514 } else { 515 sf.NumberofSFOutputAttributes = 0; 516 sf.VertexURBEntryReadLength = 1; 517 } 518 } 519 520#endif /* GEN_GEN */ 521} 522 523static void 524blorp_emit_ps_config(struct brw_context *brw, 525 const struct brw_blorp_params *params) 526{ 527 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; 528 529 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be 530 * nonzero to prevent the GPU from hanging. While the documentation doesn't 531 * mention this explicitly, it notes that the valid range for the field is 532 * [1,39] = [2,40] threads, which excludes zero. 533 * 534 * To be safe (and to minimize extraneous code) we go ahead and fully 535 * configure the WM state whether or not there is a WM program. 536 */ 537 538#if GEN_GEN >= 8 539 540 blorp_emit(brw, GENX(3DSTATE_WM), wm); 541 542 blorp_emit(brw, GENX(3DSTATE_PS), ps) { 543 if (params->src.addr.buffer) { 544 ps.SamplerCount = 1; /* Up to 4 samplers */ 545 ps.BindingTableEntryCount = 2; 546 } else { 547 ps.BindingTableEntryCount = 1; 548 } 549 550 ps.DispatchGRFStartRegisterForConstantSetupData0 = 551 prog_data->first_curbe_grf_0; 552 ps.DispatchGRFStartRegisterForConstantSetupData2 = 553 prog_data->first_curbe_grf_2; 554 555 ps._8PixelDispatchEnable = prog_data->dispatch_8; 556 ps._16PixelDispatchEnable = prog_data->dispatch_16; 557 558 ps.KernelStartPointer0 = params->wm_prog_kernel; 559 ps.KernelStartPointer2 = 560 params->wm_prog_kernel + prog_data->ksp_offset_2; 561 562 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; 563 * it implicitly scales for different GT levels (which have some # of 564 * PSDs). 565 * 566 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. 567 */ 568 if (GEN_GEN >= 9) 569 ps.MaximumNumberofThreadsPerPSD = 64 - 1; 570 else 571 ps.MaximumNumberofThreadsPerPSD = 64 - 2; 572 573 switch (params->fast_clear_op) { 574#if GEN_GEN >= 9 575 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ 576 ps.RenderTargetResolveType = RESOLVE_PARTIAL; 577 break; 578 case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */ 579 ps.RenderTargetResolveType = RESOLVE_FULL; 580 break; 581#else 582 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ 583 ps.RenderTargetResolveEnable = true; 584 break; 585#endif 586 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ 587 ps.RenderTargetFastClearEnable = true; 588 break; 589 } 590 } 591 592 blorp_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) { 593 psx.PixelShaderValid = true; 594 595 if (params->src.addr.buffer) 596 psx.PixelShaderKillsPixel = true; 597 598 psx.AttributeEnable = prog_data->num_varying_inputs > 0; 599 600 if (prog_data && prog_data->persample_msaa_dispatch) 601 psx.PixelShaderIsPerSample = true; 602 } 603 604#elif GEN_GEN >= 7 605 606 blorp_emit(brw, GENX(3DSTATE_WM), wm) { 607 switch (params->hiz_op) { 608 case GEN6_HIZ_OP_DEPTH_CLEAR: 609 wm.DepthBufferClear = true; 610 break; 611 case GEN6_HIZ_OP_DEPTH_RESOLVE: 612 wm.DepthBufferResolveEnable = true; 613 break; 614 case GEN6_HIZ_OP_HIZ_RESOLVE: 615 wm.HierarchicalDepthBufferResolveEnable = true; 616 break; 617 case GEN6_HIZ_OP_NONE: 618 break; 619 default: 620 unreachable("not reached"); 621 } 622 623 if (prog_data) 624 wm.ThreadDispatchEnable = true; 625 626 if (params->src.addr.buffer) 627 wm.PixelShaderKillPixel = true; 628 629 if (params->dst.surf.samples > 1) { 630 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 631 wm.MultisampleDispatchMode = 632 (prog_data && prog_data->persample_msaa_dispatch) ? 633 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; 634 } else { 635 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 636 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 637 } 638 } 639 640 blorp_emit(brw, GENX(3DSTATE_PS), ps) { 641 ps.MaximumNumberofThreads = brw->max_wm_threads - 1; 642 643#if GEN_IS_HASWELL 644 ps.SampleMask = 1; 645#endif 646 647 if (prog_data) { 648 ps.DispatchGRFStartRegisterforConstantSetupData0 = 649 prog_data->first_curbe_grf_0; 650 ps.DispatchGRFStartRegisterforConstantSetupData2 = 651 prog_data->first_curbe_grf_2; 652 653 ps.KernelStartPointer0 = params->wm_prog_kernel; 654 ps.KernelStartPointer2 = 655 params->wm_prog_kernel + prog_data->ksp_offset_2; 656 657 ps._8PixelDispatchEnable = prog_data->dispatch_8; 658 ps._16PixelDispatchEnable = prog_data->dispatch_16; 659 660 ps.AttributeEnable = prog_data->num_varying_inputs > 0; 661 } else { 662 /* Gen7 hardware gets angry if we don't enable at least one dispatch 663 * mode, so just enable 16-pixel dispatch if we don't have a program. 664 */ 665 ps._16PixelDispatchEnable = true; 666 } 667 668 if (params->src.addr.buffer) 669 ps.SamplerCount = 1; /* Up to 4 samplers */ 670 671 switch (params->fast_clear_op) { 672 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ 673 ps.RenderTargetResolveEnable = true; 674 break; 675 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ 676 ps.RenderTargetFastClearEnable = true; 677 break; 678 } 679 } 680 681#else /* GEN_GEN <= 6 */ 682 683 blorp_emit(brw, GENX(3DSTATE_WM), wm) { 684 wm.MaximumNumberofThreads = brw->max_wm_threads - 1; 685 686 switch (params->hiz_op) { 687 case GEN6_HIZ_OP_DEPTH_CLEAR: 688 wm.DepthBufferClear = true; 689 break; 690 case GEN6_HIZ_OP_DEPTH_RESOLVE: 691 wm.DepthBufferResolveEnable = true; 692 break; 693 case GEN6_HIZ_OP_HIZ_RESOLVE: 694 wm.HierarchicalDepthBufferResolveEnable = true; 695 break; 696 case GEN6_HIZ_OP_NONE: 697 break; 698 default: 699 unreachable("not reached"); 700 } 701 702 if (prog_data) { 703 wm.ThreadDispatchEnable = true; 704 705 wm.DispatchGRFStartRegisterforConstantSetupData0 = 706 prog_data->first_curbe_grf_0; 707 wm.DispatchGRFStartRegisterforConstantSetupData2 = 708 prog_data->first_curbe_grf_2; 709 710 wm.KernelStartPointer0 = params->wm_prog_kernel; 711 wm.KernelStartPointer2 = 712 params->wm_prog_kernel + prog_data->ksp_offset_2; 713 714 wm._8PixelDispatchEnable = prog_data->dispatch_8; 715 wm._16PixelDispatchEnable = prog_data->dispatch_16; 716 717 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 718 } 719 720 if (params->src.addr.buffer) { 721 wm.SamplerCount = 1; /* Up to 4 samplers */ 722 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */ 723 } 724 725 if (params->dst.surf.samples > 1) { 726 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 727 wm.MultisampleDispatchMode = 728 (prog_data && prog_data->persample_msaa_dispatch) ? 729 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; 730 } else { 731 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 732 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 733 } 734 } 735 736#endif /* GEN_GEN */ 737} 738 739 740static void 741blorp_emit_depth_stencil_config(struct brw_context *brw, 742 const struct brw_blorp_params *params) 743{ 744 brw_emit_depth_stall_flushes(brw); 745 746#if GEN_GEN >= 7 747 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */ 748#else 749 const uint32_t mocs = 0; 750#endif 751 752 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { 753 switch (params->depth.surf.dim) { 754 case ISL_SURF_DIM_1D: 755 db.SurfaceType = SURFTYPE_1D; 756 break; 757 case ISL_SURF_DIM_2D: 758 db.SurfaceType = SURFTYPE_2D; 759 break; 760 case ISL_SURF_DIM_3D: 761 db.SurfaceType = SURFTYPE_3D; 762 break; 763 } 764 765 db.SurfaceFormat = params->depth_format; 766 767#if GEN_GEN >= 7 768 db.DepthWriteEnable = true; 769#endif 770 771#if GEN_GEN <= 6 772 db.TiledSurface = true; 773 db.TileWalk = TILEWALK_YMAJOR; 774 db.MIPMapLayoutMode = MIPLAYOUT_BELOW; 775 db.SeparateStencilBufferEnable = true; 776#endif 777 778 db.HierarchicalDepthBufferEnable = true; 779 780 db.Width = params->depth.surf.logical_level0_px.width - 1; 781 db.Height = params->depth.surf.logical_level0_px.height - 1; 782 db.RenderTargetViewExtent = db.Depth = 783 MAX2(params->depth.surf.logical_level0_px.depth, 784 params->depth.surf.logical_level0_px.array_len) - 1; 785 786 db.LOD = params->depth.view.base_level; 787 db.MinimumArrayElement = params->depth.view.base_array_layer; 788 789 db.SurfacePitch = params->depth.surf.row_pitch - 1; 790 db.SurfaceBaseAddress = params->depth.addr; 791 db.DepthBufferMOCS = mocs; 792 } 793 794 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { 795 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; 796 hiz.SurfaceBaseAddress = params->depth.aux_addr; 797 hiz.HierarchicalDepthBufferMOCS = mocs; 798 } 799 800 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); 801} 802 803static uint32_t 804blorp_emit_blend_state(struct brw_context *brw, 805 const struct brw_blorp_params *params) 806{ 807 struct GENX(BLEND_STATE) blend; 808 memset(&blend, 0, sizeof(blend)); 809 810 for (unsigned i = 0; i < params->num_draw_buffers; ++i) { 811 blend.Entry[i].PreBlendColorClampEnable = true; 812 blend.Entry[i].PostBlendColorClampEnable = true; 813 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; 814 815 blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; 816 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; 817 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; 818 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; 819 } 820 821 uint32_t offset; 822 void *state = blorp_alloc_dynamic_state(&brw->blorp, 823 AUB_TRACE_BLEND_STATE, 824 GENX(BLEND_STATE_length) * 4, 825 64, &offset); 826 GENX(BLEND_STATE_pack)(NULL, state, &blend); 827 828#if GEN_GEN >= 7 829 blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { 830 sp.BlendStatePointer = offset; 831#if GEN_GEN >= 8 832 sp.BlendStatePointerValid = true; 833#endif 834 } 835#endif 836 837#if GEN_GEN >= 8 838 blorp_emit(brw, GENX(3DSTATE_PS_BLEND), ps_blend) { 839 ps_blend.HasWriteableRT = true; 840 } 841#endif 842 843 return offset; 844} 845 846static uint32_t 847blorp_emit_color_calc_state(struct brw_context *brw, 848 const struct brw_blorp_params *params) 849{ 850 uint32_t offset; 851 void *state = blorp_alloc_dynamic_state(&brw->blorp, 852 AUB_TRACE_CC_STATE, 853 GENX(COLOR_CALC_STATE_length) * 4, 854 64, &offset); 855 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); 856 857#if GEN_GEN >= 7 858 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) { 859 sp.ColorCalcStatePointer = offset; 860#if GEN_GEN >= 8 861 sp.ColorCalcStatePointerValid = true; 862#endif 863 } 864#endif 865 866 return offset; 867} 868 869static uint32_t 870blorp_emit_depth_stencil_state(struct brw_context *brw, 871 const struct brw_blorp_params *params) 872{ 873#if GEN_GEN >= 8 874 875 /* On gen8+, DEPTH_STENCIL state is simply an instruction */ 876 blorp_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), ds); 877 return 0; 878 879#else /* GEN_GEN <= 7 */ 880 881 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: 882 * - 7.5.3.1 Depth Buffer Clear 883 * - 7.5.3.2 Depth Buffer Resolve 884 * - 7.5.3.3 Hierarchical Depth Buffer Resolve 885 */ 886 struct GENX(DEPTH_STENCIL_STATE) ds = { 887 .DepthBufferWriteEnable = true, 888 }; 889 890 if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { 891 ds.DepthTestEnable = true; 892 ds.DepthTestFunction = COMPAREFUNCTION_NEVER; 893 } 894 895 uint32_t offset; 896 void *state = blorp_alloc_dynamic_state(&brw->blorp, 897 AUB_TRACE_DEPTH_STENCIL_STATE, 898 GENX(DEPTH_STENCIL_STATE_length) * 4, 899 64, &offset); 900 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); 901 902#if GEN_GEN >= 7 903 blorp_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) { 904 sp.PointertoDEPTH_STENCIL_STATE = offset; 905 } 906#endif 907 908 return offset; 909 910#endif /* GEN_GEN */ 911} 912 913struct surface_state_info { 914 unsigned num_dwords; 915 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */ 916 unsigned reloc_dw; 917 unsigned aux_reloc_dw; 918}; 919 920static const struct surface_state_info surface_state_infos[] = { 921 [6] = {6, 32, 1, 0}, 922 [7] = {8, 32, 1, 6}, 923 [8] = {13, 64, 8, 10}, 924 [9] = {16, 64, 8, 10}, 925}; 926 927static uint32_t 928blorp_emit_surface_state(struct brw_context *brw, 929 const struct brw_blorp_surface_info *surface, 930 bool is_render_target) 931{ 932 const struct surface_state_info ss_info = surface_state_infos[brw->gen]; 933 934 struct isl_surf surf = surface->surf; 935 936 if (surf.dim == ISL_SURF_DIM_1D && 937 surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) { 938 assert(surf.logical_level0_px.height == 1); 939 surf.dim = ISL_SURF_DIM_2D; 940 } 941 942 /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */ 943 enum isl_aux_usage aux_usage = surface->aux_usage; 944 if (aux_usage == ISL_AUX_USAGE_HIZ) 945 aux_usage = ISL_AUX_USAGE_NONE; 946 947 uint32_t surf_offset; 948 uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 949 ss_info.num_dwords * 4, ss_info.ss_align, 950 &surf_offset); 951 952 const uint32_t mocs = 953 is_render_target ? brw->blorp.mocs.rb : brw->blorp.mocs.tex; 954 uint64_t aux_bo_offset = 955 surface->aux_addr.buffer ? surface->aux_addr.buffer->offset64 : 0; 956 957 isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = &surface->view, 958 .address = surface->addr.buffer->offset64 + surface->addr.offset, 959 .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, 960 .aux_address = aux_bo_offset + surface->aux_addr.offset, 961 .mocs = mocs, .clear_color = surface->clear_color, 962 .x_offset_sa = surface->tile_x_sa, 963 .y_offset_sa = surface->tile_y_sa); 964 965 /* Emit relocation to surface contents */ 966 drm_intel_bo_emit_reloc(brw->batch.bo, 967 surf_offset + ss_info.reloc_dw * 4, 968 surface->addr.buffer, 969 dw[ss_info.reloc_dw] - surface->addr.buffer->offset64, 970 surface->addr.read_domains, 971 surface->addr.write_domain); 972 973 if (aux_usage != ISL_AUX_USAGE_NONE) { 974 /* On gen7 and prior, the bottom 12 bits of the MCS base address are 975 * used to store other information. This should be ok, however, because 976 * surface buffer addresses are always 4K page alinged. 977 */ 978 assert((surface->aux_addr.offset & 0xfff) == 0); 979 drm_intel_bo_emit_reloc(brw->batch.bo, 980 surf_offset + ss_info.aux_reloc_dw * 4, 981 surface->aux_addr.buffer, 982 dw[ss_info.aux_reloc_dw] & 0xfff, 983 surface->aux_addr.read_domains, 984 surface->aux_addr.write_domain); 985 } 986 987 return surf_offset; 988} 989 990static void 991blorp_emit_surface_states(struct brw_context *brw, 992 const struct brw_blorp_params *params) 993{ 994 uint32_t bind_offset; 995 uint32_t *bind = 996 brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, 997 sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, 998 32, /* alignment */ &bind_offset); 999 1000 bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = 1001 blorp_emit_surface_state(brw, ¶ms->dst, true); 1002 if (params->src.addr.buffer) { 1003 bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = 1004 blorp_emit_surface_state(brw, ¶ms->src, false); 1005 } 1006 1007#if GEN_GEN >= 7 1008 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { 1009 bt.PointertoPSBindingTable = bind_offset; 1010 } 1011#else 1012 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { 1013 bt.PSBindingTableChange = true; 1014 bt.PointertoPSBindingTable = bind_offset; 1015 } 1016#endif 1017} 1018 1019static void 1020blorp_emit_sampler_state(struct brw_context *brw, 1021 const struct brw_blorp_params *params) 1022{ 1023 struct GENX(SAMPLER_STATE) sampler = { 1024 .MipModeFilter = MIPFILTER_NONE, 1025 .MagModeFilter = MAPFILTER_LINEAR, 1026 .MinModeFilter = MAPFILTER_LINEAR, 1027 .MinLOD = 0, 1028 .MaxLOD = 0, 1029 .TCXAddressControlMode = TCM_CLAMP, 1030 .TCYAddressControlMode = TCM_CLAMP, 1031 .TCZAddressControlMode = TCM_CLAMP, 1032 .MaximumAnisotropy = RATIO21, 1033 .RAddressMinFilterRoundingEnable = true, 1034 .RAddressMagFilterRoundingEnable = true, 1035 .VAddressMinFilterRoundingEnable = true, 1036 .VAddressMagFilterRoundingEnable = true, 1037 .UAddressMinFilterRoundingEnable = true, 1038 .UAddressMagFilterRoundingEnable = true, 1039 .NonnormalizedCoordinateEnable = true, 1040 }; 1041 1042 uint32_t offset; 1043 void *state = blorp_alloc_dynamic_state(&brw->blorp, 1044 AUB_TRACE_SAMPLER_STATE, 1045 GENX(SAMPLER_STATE_length) * 4, 1046 32, &offset); 1047 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); 1048 1049#if GEN_GEN >= 7 1050 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { 1051 ssp.PointertoPSSamplerState = offset; 1052 } 1053#else 1054 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { 1055 ssp.VSSamplerStateChange = true; 1056 ssp.GSSamplerStateChange = true; 1057 ssp.PSSamplerStateChange = true; 1058 ssp.PointertoPSSamplerState = offset; 1059 } 1060#endif 1061} 1062 1063/* 3DSTATE_VIEWPORT_STATE_POINTERS */ 1064static void 1065blorp_emit_viewport_state(struct brw_context *brw, 1066 const struct brw_blorp_params *params) 1067{ 1068 uint32_t cc_vp_offset; 1069 1070 void *state = blorp_alloc_dynamic_state(&brw->blorp, 1071 AUB_TRACE_CC_VP_STATE, 1072 GENX(CC_VIEWPORT_length) * 4, 32, 1073 &cc_vp_offset); 1074 1075 GENX(CC_VIEWPORT_pack)(brw, state, 1076 &(struct GENX(CC_VIEWPORT)) { 1077 .MinimumDepth = 0.0, 1078 .MaximumDepth = 1.0, 1079 }); 1080 1081#if GEN_GEN >= 7 1082 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { 1083 vsp.CCViewportPointer = cc_vp_offset; 1084 } 1085#else 1086 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { 1087 vsp.CCViewportStateChange = true; 1088 vsp.PointertoCC_VIEWPORT = cc_vp_offset; 1089 } 1090#endif 1091} 1092 1093 1094/** 1095 * \brief Execute a blit or render pass operation. 1096 * 1097 * To execute the operation, this function manually constructs and emits a 1098 * batch to draw a rectangle primitive. The batchbuffer is flushed before 1099 * constructing and after emitting the batch. 1100 * 1101 * This function alters no GL state. 1102 */ 1103void 1104genX(blorp_exec)(struct brw_context *brw, 1105 const struct brw_blorp_params *params) 1106{ 1107 uint32_t blend_state_offset = 0; 1108 uint32_t color_calc_state_offset = 0; 1109 uint32_t depth_stencil_state_offset; 1110 1111#if GEN_GEN == 6 1112 /* Emit workaround flushes when we switch from drawing to blorping. */ 1113 brw_emit_post_sync_nonzero_flush(brw); 1114#endif 1115 1116 brw_upload_state_base_address(brw); 1117 1118#if GEN_GEN >= 8 1119 gen7_l3_state.emit(brw); 1120#endif 1121 1122 blorp_emit_vertex_buffers(brw, params); 1123 blorp_emit_vertex_elements(brw, params); 1124 1125 emit_urb_config(brw, params); 1126 1127 if (params->wm_prog_data) { 1128 blend_state_offset = blorp_emit_blend_state(brw, params); 1129 color_calc_state_offset = blorp_emit_color_calc_state(brw, params); 1130 } 1131 depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params); 1132 1133#if GEN_GEN <= 6 1134 /* 3DSTATE_CC_STATE_POINTERS 1135 * 1136 * The pointer offsets are relative to 1137 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. 1138 * 1139 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. 1140 * 1141 * The dynamic state emit helpers emit their own STATE_POINTERS packets on 1142 * gen7+. However, on gen6 and earlier, they're all lumpped together in 1143 * one CC_STATE_POINTERS packet so we have to emit that here. 1144 */ 1145 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) { 1146 cc.BLEND_STATEChange = true; 1147 cc.COLOR_CALC_STATEChange = true; 1148 cc.DEPTH_STENCIL_STATEChange = true; 1149 cc.PointertoBLEND_STATE = blend_state_offset; 1150 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; 1151 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; 1152 } 1153#else 1154 (void)blend_state_offset; 1155 (void)color_calc_state_offset; 1156 (void)depth_stencil_state_offset; 1157#endif 1158 1159 blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs); 1160#if GEN_GEN >= 7 1161 blorp_emit(brw, GENX(3DSTATE_CONSTANT_HS), hs); 1162 blorp_emit(brw, GENX(3DSTATE_CONSTANT_DS), DS); 1163#endif 1164 blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs); 1165 blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps); 1166 1167 if (brw->use_resource_streamer) 1168 gen7_disable_hw_binding_tables(brw); 1169 1170 if (params->wm_prog_data) 1171 blorp_emit_surface_states(brw, params); 1172 1173 if (params->src.addr.buffer) 1174 blorp_emit_sampler_state(brw, params); 1175 1176 blorp_emit_3dstate_multisample(brw, params->dst.surf.samples); 1177 1178 blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) { 1179 mask.SampleMask = (1 << params->dst.surf.samples) - 1; 1180 } 1181 1182 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, 1183 * 3DSTATE_VS, Dword 5.0 "VS Function Enable": 1184 * 1185 * [DevSNB] A pipeline flush must be programmed prior to a 1186 * 3DSTATE_VS command that causes the VS Function Enable to 1187 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL 1188 * command with CS stall bit set and a post sync operation. 1189 * 1190 * We've already done one at the start of the BLORP operation. 1191 */ 1192 blorp_emit(brw, GENX(3DSTATE_VS), vs); 1193#if GEN_GEN >= 7 1194 blorp_emit(brw, GENX(3DSTATE_HS), hs); 1195 blorp_emit(brw, GENX(3DSTATE_TE), te); 1196 blorp_emit(brw, GENX(3DSTATE_DS), DS); 1197 blorp_emit(brw, GENX(3DSTATE_STREAMOUT), so); 1198#endif 1199 blorp_emit(brw, GENX(3DSTATE_GS), gs); 1200 1201 blorp_emit(brw, GENX(3DSTATE_CLIP), clip) { 1202 clip.PerspectiveDivideDisable = true; 1203 } 1204 1205 blorp_emit_sf_config(brw, params); 1206 blorp_emit_ps_config(brw, params); 1207 1208 blorp_emit_viewport_state(brw, params); 1209 1210 if (params->depth.addr.buffer) { 1211 blorp_emit_depth_stencil_config(brw, params); 1212 } else { 1213 brw_emit_depth_stall_flushes(brw); 1214 1215 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { 1216 db.SurfaceType = SURFTYPE_NULL; 1217 db.SurfaceFormat = D32_FLOAT; 1218 } 1219 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz); 1220 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); 1221 } 1222 1223 /* 3DSTATE_CLEAR_PARAMS 1224 * 1225 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: 1226 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE 1227 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. 1228 */ 1229 blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) { 1230 clear.DepthClearValueValid = true; 1231 clear.DepthClearValue = params->depth.clear_color.u32[0]; 1232 } 1233 1234 blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 1235 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; 1236 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; 1237 } 1238 1239 blorp_emit(brw, GENX(3DPRIMITIVE), prim) { 1240 prim.VertexAccessType = SEQUENTIAL; 1241 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; 1242 prim.VertexCountPerInstance = 3; 1243 prim.InstanceCount = params->num_layers; 1244 } 1245} 1246