1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef BLORP_GENX_EXEC_H 25#define BLORP_GENX_EXEC_H 26 27#include "blorp_priv.h" 28#include "common/gen_device_info.h" 29#include "common/gen_sample_positions.h" 30#include "intel_aub.h" 31 32/** 33 * This file provides the blorp pipeline setup and execution functionality. 34 * It defines the following function: 35 * 36 * static void 37 * blorp_exec(struct blorp_context *blorp, void *batch_data, 38 * const struct blorp_params *params); 39 * 40 * It is the job of whoever includes this header to wrap this in something 41 * to get an externally visible symbol. 42 * 43 * In order for the blorp_exec function to work, the driver must provide 44 * implementations of the following static helper functions. 45 */ 46 47static void * 48blorp_emit_dwords(struct blorp_batch *batch, unsigned n); 49 50static uint64_t 51blorp_emit_reloc(struct blorp_batch *batch, 52 void *location, struct blorp_address address, uint32_t delta); 53 54static void * 55blorp_alloc_dynamic_state(struct blorp_batch *batch, 56 enum aub_state_struct_type type, 57 uint32_t size, 58 uint32_t alignment, 59 uint32_t *offset); 60static void * 61blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, 62 struct blorp_address *addr); 63 64static void 65blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, 66 unsigned state_size, unsigned state_alignment, 67 uint32_t *bt_offset, uint32_t *surface_offsets, 68 void **surface_maps); 69 70static void 71blorp_flush_range(struct blorp_batch *batch, void *start, size_t size); 72 73static void 74blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, 75 struct blorp_address address, uint32_t delta); 76 77static void 78blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size); 79 80/***** BEGIN blorp_exec implementation ******/ 81 82#include "genxml/gen_macros.h" 83 84static uint64_t 85_blorp_combine_address(struct blorp_batch *batch, void *location, 86 struct blorp_address address, uint32_t delta) 87{ 88 if (address.buffer == NULL) { 89 return address.offset + delta; 90 } else { 91 return blorp_emit_reloc(batch, location, address, delta); 92 } 93} 94 95#define __gen_address_type struct blorp_address 96#define __gen_user_data struct blorp_batch 97#define __gen_combine_address _blorp_combine_address 98 99#include "genxml/genX_pack.h" 100 101#define _blorp_cmd_length(cmd) cmd ## _length 102#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias 103#define _blorp_cmd_header(cmd) cmd ## _header 104#define _blorp_cmd_pack(cmd) cmd ## _pack 105 106#define blorp_emit(batch, cmd, name) \ 107 for (struct cmd name = { _blorp_cmd_header(cmd) }, \ 108 *_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \ 109 __builtin_expect(_dst != NULL, 1); \ 110 _blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \ 111 _dst = NULL) 112 113#define blorp_emitn(batch, cmd, n) ({ \ 114 uint32_t *_dw = blorp_emit_dwords(batch, n); \ 115 struct cmd template = { \ 116 _blorp_cmd_header(cmd), \ 117 .DWordLength = n - _blorp_cmd_length_bias(cmd), \ 118 }; \ 119 _blorp_cmd_pack(cmd)(batch, _dw, &template); \ 120 _dw + 1; /* Array starts at dw[1] */ \ 121 }) 122 123/* 3DSTATE_URB 124 * 3DSTATE_URB_VS 125 * 3DSTATE_URB_HS 126 * 3DSTATE_URB_DS 127 * 3DSTATE_URB_GS 128 * 129 * Assign the entire URB to the VS. Even though the VS disabled, URB space 130 * is still needed because the clipper loads the VUE's from the URB. From 131 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, 132 * Dword 1.15:0 "VS Number of URB Entries": 133 * This field is always used (even if VS Function Enable is DISABLED). 134 * 135 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can 136 * safely ignore it because this batch contains only one draw call. 137 * Because of URB corruption caused by allocating a previous GS unit 138 * URB entry to the VS unit, software is required to send a “GS NULL 139 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) 140 * plus a dummy DRAW call before any case where VS will be taking over 141 * GS URB space. 142 * 143 * If the 3DSTATE_URB_VS is emitted, than the others must be also. 144 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: 145 * 146 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be 147 * programmed in order for the programming of this state to be 148 * valid. 149 */ 150static void 151emit_urb_config(struct blorp_batch *batch, 152 const struct blorp_params *params) 153{ 154 /* Once vertex fetcher has written full VUE entries with complete 155 * header the space requirement is as follows per vertex (in bytes): 156 * 157 * Header Position Program constants 158 * +--------+------------+-------------------+ 159 * | 16 | 16 | n x 16 | 160 * +--------+------------+-------------------+ 161 * 162 * where 'n' stands for number of varying inputs expressed as vec4s. 163 */ 164 const unsigned num_varyings = 165 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; 166 const unsigned total_needed = 16 + 16 + num_varyings * 16; 167 168 /* The URB size is expressed in units of 64 bytes (512 bits) */ 169 const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64); 170 171 blorp_emit_urb_config(batch, vs_entry_size); 172} 173 174static void 175blorp_emit_vertex_data(struct blorp_batch *batch, 176 const struct blorp_params *params, 177 struct blorp_address *addr, 178 uint32_t *size) 179{ 180 const float vertices[] = { 181 /* v0 */ (float)params->x1, (float)params->y1, params->z, 182 /* v1 */ (float)params->x0, (float)params->y1, params->z, 183 /* v2 */ (float)params->x0, (float)params->y0, params->z, 184 }; 185 186 void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr); 187 memcpy(data, vertices, sizeof(vertices)); 188 *size = sizeof(vertices); 189 blorp_flush_range(batch, data, *size); 190} 191 192static void 193blorp_emit_input_varying_data(struct blorp_batch *batch, 194 const struct blorp_params *params, 195 struct blorp_address *addr, 196 uint32_t *size) 197{ 198 const unsigned vec4_size_in_bytes = 4 * sizeof(float); 199 const unsigned max_num_varyings = 200 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); 201 const unsigned num_varyings = 202 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; 203 204 *size = 16 + num_varyings * vec4_size_in_bytes; 205 206 const uint32_t *const inputs_src = (const uint32_t *)¶ms->wm_inputs; 207 void *data = blorp_alloc_vertex_buffer(batch, *size, addr); 208 uint32_t *inputs = data; 209 210 /* Copy in the VS inputs */ 211 assert(sizeof(params->vs_inputs) == 16); 212 memcpy(inputs, ¶ms->vs_inputs, sizeof(params->vs_inputs)); 213 inputs += 4; 214 215 if (params->wm_prog_data) { 216 /* Walk over the attribute slots, determine if the attribute is used by 217 * the program and when necessary copy the values from the input storage 218 * to the vertex data buffer. 219 */ 220 for (unsigned i = 0; i < max_num_varyings; i++) { 221 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; 222 223 const int input_index = params->wm_prog_data->urb_setup[attr]; 224 if (input_index < 0) 225 continue; 226 227 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); 228 229 inputs += 4; 230 } 231 } 232 233 blorp_flush_range(batch, data, *size); 234} 235 236static void 237blorp_emit_vertex_buffers(struct blorp_batch *batch, 238 const struct blorp_params *params) 239{ 240 struct GENX(VERTEX_BUFFER_STATE) vb[2]; 241 memset(vb, 0, sizeof(vb)); 242 243 uint32_t size; 244 blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size); 245 vb[0].VertexBufferIndex = 0; 246 vb[0].BufferPitch = 3 * sizeof(float); 247 vb[0].VertexBufferMOCS = batch->blorp->mocs.vb; 248#if GEN_GEN >= 7 249 vb[0].AddressModifyEnable = true; 250#endif 251#if GEN_GEN >= 8 252 vb[0].BufferSize = size; 253#else 254 vb[0].BufferAccessType = VERTEXDATA; 255 vb[0].EndAddress = vb[0].BufferStartingAddress; 256 vb[0].EndAddress.offset += size - 1; 257#endif 258 259 blorp_emit_input_varying_data(batch, params, 260 &vb[1].BufferStartingAddress, &size); 261 vb[1].VertexBufferIndex = 1; 262 vb[1].BufferPitch = 0; 263 vb[1].VertexBufferMOCS = batch->blorp->mocs.vb; 264#if GEN_GEN >= 7 265 vb[1].AddressModifyEnable = true; 266#endif 267#if GEN_GEN >= 8 268 vb[1].BufferSize = size; 269#else 270 vb[1].BufferAccessType = INSTANCEDATA; 271 vb[1].EndAddress = vb[1].BufferStartingAddress; 272 vb[1].EndAddress.offset += size - 1; 273#endif 274 275 const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2; 276 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords); 277 278 for (unsigned i = 0; i < 2; i++) { 279 GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]); 280 dw += GENX(VERTEX_BUFFER_STATE_length); 281 } 282} 283 284static void 285blorp_emit_vertex_elements(struct blorp_batch *batch, 286 const struct blorp_params *params) 287{ 288 const unsigned num_varyings = 289 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; 290 const unsigned num_elements = 2 + num_varyings; 291 292 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements]; 293 memset(ve, 0, num_elements * sizeof(*ve)); 294 295 /* Setup VBO for the rectangle primitive.. 296 * 297 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three 298 * vertices. The vertices reside in screen space with DirectX 299 * coordinates (that is, (0, 0) is the upper left corner). 300 * 301 * v2 ------ implied 302 * | | 303 * | | 304 * v1 ----- v0 305 * 306 * Since the VS is disabled, the clipper loads each VUE directly from 307 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and 308 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: 309 * dw0: Reserved, MBZ. 310 * dw1: Render Target Array Index. Below vertex fetcher gets programmed 311 * to assign this with primitive instance identifier which will be 312 * used for layered clears. All other renders have only one instance 313 * and therefore the value will be effectively zero. 314 * dw2: Viewport Index. The HiZ op disables viewport mapping and 315 * scissoring, so set the dword to 0. 316 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, 317 * so set the dword to 0. 318 * dw4: Vertex Position X. 319 * dw5: Vertex Position Y. 320 * dw6: Vertex Position Z. 321 * dw7: Vertex Position W. 322 * 323 * dw8: Flat vertex input 0 324 * dw9: Flat vertex input 1 325 * ... 326 * dwn: Flat vertex input n - 8 327 * 328 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 329 * "Vertex URB Entry (VUE) Formats". 330 * 331 * Only vertex position X and Y are going to be variable, Z is fixed to 332 * zero and W to one. Header words dw0,2,3 are zero. There is no need to 333 * include the fixed values in the vertex buffer. Vertex fetcher can be 334 * instructed to fill vertex elements with constant values of one and zero 335 * instead of reading them from the buffer. 336 * Flat inputs are program constants that are not interpolated. Moreover 337 * their values will be the same between vertices. 338 * 339 * See the vertex element setup below. 340 */ 341 ve[0].VertexBufferIndex = 1; 342 ve[0].Valid = true; 343 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 344 ve[0].SourceElementOffset = 0; 345 ve[0].Component0Control = VFCOMP_STORE_SRC; 346 347 /* From Gen8 onwards hardware is no more instructed to overwrite components 348 * using an element specifier. Instead one has separate 3DSTATE_VF_SGVS 349 * (System Generated Value Setup) state packet for it. 350 */ 351#if GEN_GEN >= 8 352 ve[0].Component1Control = VFCOMP_STORE_0; 353#else 354 ve[0].Component1Control = VFCOMP_STORE_IID; 355#endif 356 ve[0].Component2Control = VFCOMP_STORE_SRC; 357 ve[0].Component3Control = VFCOMP_STORE_SRC; 358 359 ve[1].VertexBufferIndex = 0; 360 ve[1].Valid = true; 361 ve[1].SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT; 362 ve[1].SourceElementOffset = 0; 363 ve[1].Component0Control = VFCOMP_STORE_SRC; 364 ve[1].Component1Control = VFCOMP_STORE_SRC; 365 ve[1].Component2Control = VFCOMP_STORE_SRC; 366 ve[1].Component3Control = VFCOMP_STORE_1_FP; 367 368 for (unsigned i = 0; i < num_varyings; ++i) { 369 ve[i + 2].VertexBufferIndex = 1; 370 ve[i + 2].Valid = true; 371 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 372 ve[i + 2].SourceElementOffset = 16 + i * 4 * sizeof(float); 373 ve[i + 2].Component0Control = VFCOMP_STORE_SRC; 374 ve[i + 2].Component1Control = VFCOMP_STORE_SRC; 375 ve[i + 2].Component2Control = VFCOMP_STORE_SRC; 376 ve[i + 2].Component3Control = VFCOMP_STORE_SRC; 377 } 378 379 const unsigned num_dwords = 380 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements; 381 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords); 382 383 for (unsigned i = 0; i < num_elements; i++) { 384 GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]); 385 dw += GENX(VERTEX_ELEMENT_STATE_length); 386 } 387 388#if GEN_GEN >= 8 389 /* Overwrite Render Target Array Index (2nd dword) in the VUE header with 390 * primitive instance identifier. This is used for layered clears. 391 */ 392 blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) { 393 sgvs.InstanceIDEnable = true; 394 sgvs.InstanceIDComponentNumber = COMP_1; 395 sgvs.InstanceIDElementOffset = 0; 396 } 397 398 for (unsigned i = 0; i < num_elements; i++) { 399 blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) { 400 vf.VertexElementIndex = i; 401 vf.InstancingEnable = false; 402 } 403 } 404 405 blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { 406 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; 407 } 408#endif 409} 410 411static void 412blorp_emit_vs_config(struct blorp_batch *batch, 413 const struct blorp_params *params) 414{ 415 struct brw_vs_prog_data *vs_prog_data = params->vs_prog_data; 416 417 blorp_emit(batch, GENX(3DSTATE_VS), vs) { 418 if (vs_prog_data) { 419 vs.FunctionEnable = true; 420 421 vs.KernelStartPointer = params->vs_prog_kernel; 422 423 vs.DispatchGRFStartRegisterForURBData = 424 vs_prog_data->base.base.dispatch_grf_start_reg; 425 vs.VertexURBEntryReadLength = 426 vs_prog_data->base.urb_read_length; 427 vs.VertexURBEntryReadOffset = 0; 428 429 vs.MaximumNumberofThreads = 430 batch->blorp->isl_dev->info->max_vs_threads - 1; 431 432#if GEN_GEN >= 8 433 vs.SIMD8DispatchEnable = 434 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; 435#endif 436 } 437 } 438} 439 440static void 441blorp_emit_sf_config(struct blorp_batch *batch, 442 const struct blorp_params *params) 443{ 444 const struct brw_wm_prog_data *prog_data = params->wm_prog_data; 445 446 /* 3DSTATE_SF 447 * 448 * Disable ViewportTransformEnable (dw2.1) 449 * 450 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D 451 * Primitives Overview": 452 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the 453 * use of screen- space coordinates). 454 * 455 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) 456 * and BackFaceFillMode (dw2.5:6) to SOLID(0). 457 * 458 * From the Sandy Bridge PRM, Volume 2, Part 1, Section 459 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: 460 * SOLID: Any triangle or rectangle object found to be front-facing 461 * is rendered as a solid object. This setting is required when 462 * (rendering rectangle (RECTLIST) objects. 463 */ 464 465#if GEN_GEN >= 8 466 467 blorp_emit(batch, GENX(3DSTATE_SF), sf); 468 469 blorp_emit(batch, GENX(3DSTATE_RASTER), raster) { 470 raster.CullMode = CULLMODE_NONE; 471 } 472 473 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { 474 sbe.VertexURBEntryReadOffset = 1; 475 if (prog_data) { 476 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 477 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 478 sbe.ConstantInterpolationEnable = prog_data->flat_inputs; 479 } else { 480 sbe.NumberofSFOutputAttributes = 0; 481 sbe.VertexURBEntryReadLength = 1; 482 } 483 sbe.ForceVertexURBEntryReadLength = true; 484 sbe.ForceVertexURBEntryReadOffset = true; 485 486#if GEN_GEN >= 9 487 for (unsigned i = 0; i < 32; i++) 488 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; 489#endif 490 } 491 492#elif GEN_GEN >= 7 493 494 blorp_emit(batch, GENX(3DSTATE_SF), sf) { 495 sf.FrontFaceFillMode = FILL_MODE_SOLID; 496 sf.BackFaceFillMode = FILL_MODE_SOLID; 497 498 sf.MultisampleRasterizationMode = params->num_samples > 1 ? 499 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 500 501#if GEN_GEN == 7 502 sf.DepthBufferSurfaceFormat = params->depth_format; 503#endif 504 } 505 506 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { 507 sbe.VertexURBEntryReadOffset = 1; 508 if (prog_data) { 509 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 510 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 511 sbe.ConstantInterpolationEnable = prog_data->flat_inputs; 512 } else { 513 sbe.NumberofSFOutputAttributes = 0; 514 sbe.VertexURBEntryReadLength = 1; 515 } 516 } 517 518#else /* GEN_GEN <= 6 */ 519 520 blorp_emit(batch, GENX(3DSTATE_SF), sf) { 521 sf.FrontFaceFillMode = FILL_MODE_SOLID; 522 sf.BackFaceFillMode = FILL_MODE_SOLID; 523 524 sf.MultisampleRasterizationMode = params->num_samples > 1 ? 525 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 526 527 sf.VertexURBEntryReadOffset = 1; 528 if (prog_data) { 529 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 530 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); 531 sf.ConstantInterpolationEnable = prog_data->flat_inputs; 532 } else { 533 sf.NumberofSFOutputAttributes = 0; 534 sf.VertexURBEntryReadLength = 1; 535 } 536 } 537 538#endif /* GEN_GEN */ 539} 540 541static void 542blorp_emit_ps_config(struct blorp_batch *batch, 543 const struct blorp_params *params) 544{ 545 const struct brw_wm_prog_data *prog_data = params->wm_prog_data; 546 547 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be 548 * nonzero to prevent the GPU from hanging. While the documentation doesn't 549 * mention this explicitly, it notes that the valid range for the field is 550 * [1,39] = [2,40] threads, which excludes zero. 551 * 552 * To be safe (and to minimize extraneous code) we go ahead and fully 553 * configure the WM state whether or not there is a WM program. 554 */ 555 556#if GEN_GEN >= 8 557 558 blorp_emit(batch, GENX(3DSTATE_WM), wm); 559 560 blorp_emit(batch, GENX(3DSTATE_PS), ps) { 561 if (params->src.enabled) { 562 ps.SamplerCount = 1; /* Up to 4 samplers */ 563 ps.BindingTableEntryCount = 2; 564 } else { 565 ps.BindingTableEntryCount = 1; 566 } 567 568 if (prog_data) { 569 ps.DispatchGRFStartRegisterForConstantSetupData0 = 570 prog_data->base.dispatch_grf_start_reg; 571 ps.DispatchGRFStartRegisterForConstantSetupData2 = 572 prog_data->dispatch_grf_start_reg_2; 573 574 ps._8PixelDispatchEnable = prog_data->dispatch_8; 575 ps._16PixelDispatchEnable = prog_data->dispatch_16; 576 577 ps.KernelStartPointer0 = params->wm_prog_kernel; 578 ps.KernelStartPointer2 = 579 params->wm_prog_kernel + prog_data->prog_offset_2; 580 } 581 582 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; 583 * it implicitly scales for different GT levels (which have some # of 584 * PSDs). 585 * 586 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. 587 */ 588 if (GEN_GEN >= 9) 589 ps.MaximumNumberofThreadsPerPSD = 64 - 1; 590 else 591 ps.MaximumNumberofThreadsPerPSD = 64 - 2; 592 593 switch (params->fast_clear_op) { 594 case BLORP_FAST_CLEAR_OP_NONE: 595 break; 596#if GEN_GEN >= 9 597 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL: 598 ps.RenderTargetResolveType = RESOLVE_PARTIAL; 599 break; 600 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: 601 ps.RenderTargetResolveType = RESOLVE_FULL; 602 break; 603#else 604 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: 605 ps.RenderTargetResolveEnable = true; 606 break; 607#endif 608 case BLORP_FAST_CLEAR_OP_CLEAR: 609 ps.RenderTargetFastClearEnable = true; 610 break; 611 default: 612 unreachable("Invalid fast clear op"); 613 } 614 } 615 616 blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) { 617 if (prog_data) { 618 psx.PixelShaderValid = true; 619 psx.AttributeEnable = prog_data->num_varying_inputs > 0; 620 psx.PixelShaderIsPerSample = prog_data->persample_dispatch; 621 } 622 623 if (params->src.enabled) 624 psx.PixelShaderKillsPixel = true; 625 } 626 627#elif GEN_GEN >= 7 628 629 blorp_emit(batch, GENX(3DSTATE_WM), wm) { 630 switch (params->hiz_op) { 631 case BLORP_HIZ_OP_DEPTH_CLEAR: 632 wm.DepthBufferClear = true; 633 break; 634 case BLORP_HIZ_OP_DEPTH_RESOLVE: 635 wm.DepthBufferResolveEnable = true; 636 break; 637 case BLORP_HIZ_OP_HIZ_RESOLVE: 638 wm.HierarchicalDepthBufferResolveEnable = true; 639 break; 640 case BLORP_HIZ_OP_NONE: 641 break; 642 default: 643 unreachable("not reached"); 644 } 645 646 if (prog_data) 647 wm.ThreadDispatchEnable = true; 648 649 if (params->src.enabled) 650 wm.PixelShaderKillsPixel = true; 651 652 if (params->num_samples > 1) { 653 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 654 wm.MultisampleDispatchMode = 655 (prog_data && prog_data->persample_dispatch) ? 656 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; 657 } else { 658 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 659 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 660 } 661 } 662 663 blorp_emit(batch, GENX(3DSTATE_PS), ps) { 664 ps.MaximumNumberofThreads = 665 batch->blorp->isl_dev->info->max_wm_threads - 1; 666 667#if GEN_IS_HASWELL 668 ps.SampleMask = 1; 669#endif 670 671 if (prog_data) { 672 ps.DispatchGRFStartRegisterForConstantSetupData0 = 673 prog_data->base.dispatch_grf_start_reg; 674 ps.DispatchGRFStartRegisterForConstantSetupData2 = 675 prog_data->dispatch_grf_start_reg_2; 676 677 ps.KernelStartPointer0 = params->wm_prog_kernel; 678 ps.KernelStartPointer2 = 679 params->wm_prog_kernel + prog_data->prog_offset_2; 680 681 ps._8PixelDispatchEnable = prog_data->dispatch_8; 682 ps._16PixelDispatchEnable = prog_data->dispatch_16; 683 684 ps.AttributeEnable = prog_data->num_varying_inputs > 0; 685 } else { 686 /* Gen7 hardware gets angry if we don't enable at least one dispatch 687 * mode, so just enable 16-pixel dispatch if we don't have a program. 688 */ 689 ps._16PixelDispatchEnable = true; 690 } 691 692 if (params->src.enabled) 693 ps.SamplerCount = 1; /* Up to 4 samplers */ 694 695 switch (params->fast_clear_op) { 696 case BLORP_FAST_CLEAR_OP_NONE: 697 break; 698 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: 699 ps.RenderTargetResolveEnable = true; 700 break; 701 case BLORP_FAST_CLEAR_OP_CLEAR: 702 ps.RenderTargetFastClearEnable = true; 703 break; 704 default: 705 unreachable("Invalid fast clear op"); 706 } 707 } 708 709#else /* GEN_GEN <= 6 */ 710 711 blorp_emit(batch, GENX(3DSTATE_WM), wm) { 712 wm.MaximumNumberofThreads = 713 batch->blorp->isl_dev->info->max_wm_threads - 1; 714 715 switch (params->hiz_op) { 716 case BLORP_HIZ_OP_DEPTH_CLEAR: 717 wm.DepthBufferClear = true; 718 break; 719 case BLORP_HIZ_OP_DEPTH_RESOLVE: 720 wm.DepthBufferResolveEnable = true; 721 break; 722 case BLORP_HIZ_OP_HIZ_RESOLVE: 723 wm.HierarchicalDepthBufferResolveEnable = true; 724 break; 725 case BLORP_HIZ_OP_NONE: 726 break; 727 default: 728 unreachable("not reached"); 729 } 730 731 if (prog_data) { 732 wm.ThreadDispatchEnable = true; 733 734 wm.DispatchGRFStartRegisterForConstantSetupData0 = 735 prog_data->base.dispatch_grf_start_reg; 736 wm.DispatchGRFStartRegisterForConstantSetupData2 = 737 prog_data->dispatch_grf_start_reg_2; 738 739 wm.KernelStartPointer0 = params->wm_prog_kernel; 740 wm.KernelStartPointer2 = 741 params->wm_prog_kernel + prog_data->prog_offset_2; 742 743 wm._8PixelDispatchEnable = prog_data->dispatch_8; 744 wm._16PixelDispatchEnable = prog_data->dispatch_16; 745 746 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; 747 } 748 749 if (params->src.enabled) { 750 wm.SamplerCount = 1; /* Up to 4 samplers */ 751 wm.PixelShaderKillsPixel = true; /* TODO: temporarily smash on */ 752 } 753 754 if (params->num_samples > 1) { 755 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 756 wm.MultisampleDispatchMode = 757 (prog_data && prog_data->persample_dispatch) ? 758 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; 759 } else { 760 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 761 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 762 } 763 } 764 765#endif /* GEN_GEN */ 766} 767 768static const uint32_t isl_to_gen_ds_surftype [] = { 769#if GEN_GEN >= 9 770 /* From the SKL PRM, "3DSTATE_DEPTH_STENCIL::SurfaceType": 771 * 772 * "If depth/stencil is enabled with 1D render target, depth/stencil 773 * surface type needs to be set to 2D surface type and height set to 1. 774 * Depth will use (legacy) TileY and stencil will use TileW. For this 775 * case only, the Surface Type of the depth buffer can be 2D while the 776 * Surface Type of the render target(s) are 1D, representing an 777 * exception to a programming note above. 778 */ 779 [ISL_SURF_DIM_1D] = SURFTYPE_2D, 780#else 781 [ISL_SURF_DIM_1D] = SURFTYPE_1D, 782#endif 783 [ISL_SURF_DIM_2D] = SURFTYPE_2D, 784 [ISL_SURF_DIM_3D] = SURFTYPE_3D, 785}; 786 787static void 788blorp_emit_depth_stencil_config(struct blorp_batch *batch, 789 const struct blorp_params *params) 790{ 791#if GEN_GEN >= 7 792 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */ 793#else 794 const uint32_t mocs = 0; 795#endif 796 797 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { 798#if GEN_GEN >= 7 799 db.DepthWriteEnable = params->depth.enabled; 800 db.StencilWriteEnable = params->stencil.enabled; 801#endif 802 803#if GEN_GEN <= 6 804 db.SeparateStencilBufferEnable = true; 805#endif 806 807 if (params->depth.enabled) { 808 db.SurfaceFormat = params->depth_format; 809 db.SurfaceType = isl_to_gen_ds_surftype[params->depth.surf.dim]; 810 811#if GEN_GEN <= 6 812 db.TiledSurface = true; 813 db.TileWalk = TILEWALK_YMAJOR; 814 db.MIPMapLayoutMode = MIPLAYOUT_BELOW; 815#endif 816 817 db.HierarchicalDepthBufferEnable = 818 params->depth.aux_usage == ISL_AUX_USAGE_HIZ; 819 820 db.Width = params->depth.surf.logical_level0_px.width - 1; 821 db.Height = params->depth.surf.logical_level0_px.height - 1; 822 db.RenderTargetViewExtent = db.Depth = 823 params->depth.view.array_len - 1; 824 825 db.LOD = params->depth.view.base_level; 826 db.MinimumArrayElement = params->depth.view.base_array_layer; 827 828 db.SurfacePitch = params->depth.surf.row_pitch - 1; 829#if GEN_GEN >= 8 830 db.SurfaceQPitch = 831 isl_surf_get_array_pitch_el_rows(¶ms->depth.surf) >> 2, 832#endif 833 834 db.SurfaceBaseAddress = params->depth.addr; 835 db.DepthBufferMOCS = mocs; 836 } else if (params->stencil.enabled) { 837 db.SurfaceFormat = D32_FLOAT; 838 db.SurfaceType = isl_to_gen_ds_surftype[params->stencil.surf.dim]; 839 840 db.Width = params->stencil.surf.logical_level0_px.width - 1; 841 db.Height = params->stencil.surf.logical_level0_px.height - 1; 842 db.RenderTargetViewExtent = db.Depth = 843 params->stencil.view.array_len - 1; 844 845 db.LOD = params->stencil.view.base_level; 846 db.MinimumArrayElement = params->stencil.view.base_array_layer; 847 } else { 848 db.SurfaceType = SURFTYPE_NULL; 849 db.SurfaceFormat = D32_FLOAT; 850 } 851 } 852 853 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { 854 if (params->depth.aux_usage == ISL_AUX_USAGE_HIZ) { 855 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; 856 hiz.SurfaceBaseAddress = params->depth.aux_addr; 857 hiz.HierarchicalDepthBufferMOCS = mocs; 858#if GEN_GEN >= 8 859 hiz.SurfaceQPitch = 860 isl_surf_get_array_pitch_sa_rows(¶ms->depth.aux_surf) >> 2; 861#endif 862 } 863 } 864 865 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb) { 866 if (params->stencil.enabled) { 867#if GEN_GEN >= 8 || GEN_IS_HASWELL 868 sb.StencilBufferEnable = true; 869#endif 870 871 sb.SurfacePitch = params->stencil.surf.row_pitch - 1, 872#if GEN_GEN >= 8 873 sb.SurfaceQPitch = 874 isl_surf_get_array_pitch_el_rows(¶ms->stencil.surf) >> 2, 875#endif 876 877 sb.SurfaceBaseAddress = params->stencil.addr; 878 sb.StencilBufferMOCS = batch->blorp->mocs.tex; 879 } 880 } 881 882 /* 3DSTATE_CLEAR_PARAMS 883 * 884 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: 885 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE 886 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. 887 */ 888 blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) { 889 clear.DepthClearValueValid = true; 890 clear.DepthClearValue = params->depth.clear_color.u32[0]; 891 } 892} 893 894static uint32_t 895blorp_emit_blend_state(struct blorp_batch *batch, 896 const struct blorp_params *params) 897{ 898 struct GENX(BLEND_STATE) blend; 899 memset(&blend, 0, sizeof(blend)); 900 901 for (unsigned i = 0; i < params->num_draw_buffers; ++i) { 902 blend.Entry[i].PreBlendColorClampEnable = true; 903 blend.Entry[i].PostBlendColorClampEnable = true; 904 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; 905 906 blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; 907 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; 908 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; 909 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; 910 } 911 912 uint32_t offset; 913 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_BLEND_STATE, 914 GENX(BLEND_STATE_length) * 4, 915 64, &offset); 916 GENX(BLEND_STATE_pack)(NULL, state, &blend); 917 blorp_flush_range(batch, state, GENX(BLEND_STATE_length) * 4); 918 919#if GEN_GEN >= 7 920 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { 921 sp.BlendStatePointer = offset; 922#if GEN_GEN >= 8 923 sp.BlendStatePointerValid = true; 924#endif 925 } 926#endif 927 928#if GEN_GEN >= 8 929 blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { 930 ps_blend.HasWriteableRT = true; 931 } 932#endif 933 934 return offset; 935} 936 937static uint32_t 938blorp_emit_color_calc_state(struct blorp_batch *batch, 939 const struct blorp_params *params) 940{ 941 struct GENX(COLOR_CALC_STATE) cc = { 0 }; 942 943#if GEN_GEN <= 8 944 cc.StencilReferenceValue = params->stencil_ref; 945#endif 946 947 uint32_t offset; 948 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_STATE, 949 GENX(COLOR_CALC_STATE_length) * 4, 950 64, &offset); 951 GENX(COLOR_CALC_STATE_pack)(NULL, state, &cc); 952 blorp_flush_range(batch, state, GENX(COLOR_CALC_STATE_length) * 4); 953 954#if GEN_GEN >= 7 955 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { 956 sp.ColorCalcStatePointer = offset; 957#if GEN_GEN >= 8 958 sp.ColorCalcStatePointerValid = true; 959#endif 960 } 961#endif 962 963 return offset; 964} 965 966static uint32_t 967blorp_emit_depth_stencil_state(struct blorp_batch *batch, 968 const struct blorp_params *params) 969{ 970#if GEN_GEN >= 8 971 struct GENX(3DSTATE_WM_DEPTH_STENCIL) ds = { 972 GENX(3DSTATE_WM_DEPTH_STENCIL_header), 973 }; 974#else 975 struct GENX(DEPTH_STENCIL_STATE) ds = { 0 }; 976#endif 977 978 if (params->depth.enabled) { 979 ds.DepthBufferWriteEnable = true; 980 981 switch (params->hiz_op) { 982 case BLORP_HIZ_OP_NONE: 983 ds.DepthTestEnable = true; 984 ds.DepthTestFunction = COMPAREFUNCTION_ALWAYS; 985 break; 986 987 /* See the following sections of the Sandy Bridge PRM, Volume 2, Part1: 988 * - 7.5.3.1 Depth Buffer Clear 989 * - 7.5.3.2 Depth Buffer Resolve 990 * - 7.5.3.3 Hierarchical Depth Buffer Resolve 991 */ 992 case BLORP_HIZ_OP_DEPTH_RESOLVE: 993 ds.DepthTestEnable = true; 994 ds.DepthTestFunction = COMPAREFUNCTION_NEVER; 995 break; 996 997 case BLORP_HIZ_OP_DEPTH_CLEAR: 998 case BLORP_HIZ_OP_HIZ_RESOLVE: 999 ds.DepthTestEnable = false; 1000 break; 1001 } 1002 } 1003 1004 if (params->stencil.enabled) { 1005 ds.StencilBufferWriteEnable = true; 1006 ds.StencilTestEnable = true; 1007 ds.DoubleSidedStencilEnable = false; 1008 1009 ds.StencilTestFunction = COMPAREFUNCTION_ALWAYS; 1010 ds.StencilPassDepthPassOp = STENCILOP_REPLACE; 1011 1012 ds.StencilWriteMask = params->stencil_mask; 1013#if GEN_GEN >= 9 1014 ds.StencilReferenceValue = params->stencil_ref; 1015#endif 1016 } 1017 1018#if GEN_GEN >= 8 1019 uint32_t offset = 0; 1020 uint32_t *dw = blorp_emit_dwords(batch, 1021 GENX(3DSTATE_WM_DEPTH_STENCIL_length)); 1022 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &ds); 1023#else 1024 uint32_t offset; 1025 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_DEPTH_STENCIL_STATE, 1026 GENX(DEPTH_STENCIL_STATE_length) * 4, 1027 64, &offset); 1028 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); 1029 blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4); 1030#endif 1031 1032#if GEN_GEN == 7 1033 blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) { 1034 sp.PointertoDEPTH_STENCIL_STATE = offset; 1035 } 1036#endif 1037 1038 return offset; 1039} 1040 1041static void 1042blorp_emit_surface_state(struct blorp_batch *batch, 1043 const struct brw_blorp_surface_info *surface, 1044 void *state, uint32_t state_offset, 1045 bool is_render_target) 1046{ 1047 const struct isl_device *isl_dev = batch->blorp->isl_dev; 1048 struct isl_surf surf = surface->surf; 1049 1050 if (surf.dim == ISL_SURF_DIM_1D && 1051 surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) { 1052 assert(surf.logical_level0_px.height == 1); 1053 surf.dim = ISL_SURF_DIM_2D; 1054 } 1055 1056 /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */ 1057 enum isl_aux_usage aux_usage = surface->aux_usage; 1058 if (aux_usage == ISL_AUX_USAGE_HIZ) 1059 aux_usage = ISL_AUX_USAGE_NONE; 1060 1061 const uint32_t mocs = 1062 is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex; 1063 1064 isl_surf_fill_state(batch->blorp->isl_dev, state, 1065 .surf = &surf, .view = &surface->view, 1066 .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, 1067 .mocs = mocs, .clear_color = surface->clear_color); 1068 1069 blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, 1070 surface->addr, 0); 1071 1072 if (aux_usage != ISL_AUX_USAGE_NONE) { 1073 /* On gen7 and prior, the bottom 12 bits of the MCS base address are 1074 * used to store other information. This should be ok, however, because 1075 * surface buffer addresses are always 4K page alinged. 1076 */ 1077 assert((surface->aux_addr.offset & 0xfff) == 0); 1078 uint32_t *aux_addr = state + isl_dev->ss.aux_addr_offset; 1079 blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset, 1080 surface->aux_addr, *aux_addr); 1081 } 1082 1083 blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); 1084} 1085 1086static void 1087blorp_emit_null_surface_state(struct blorp_batch *batch, 1088 const struct brw_blorp_surface_info *surface, 1089 uint32_t *state) 1090{ 1091 struct GENX(RENDER_SURFACE_STATE) ss = { 1092 .SurfaceType = SURFTYPE_NULL, 1093 .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, 1094 .Width = surface->surf.logical_level0_px.width - 1, 1095 .Height = surface->surf.logical_level0_px.height - 1, 1096 .MIPCountLOD = surface->view.base_level, 1097 .MinimumArrayElement = surface->view.base_array_layer, 1098 .Depth = surface->view.array_len - 1, 1099 .RenderTargetViewExtent = surface->view.array_len - 1, 1100 .NumberofMultisamples = ffs(surface->surf.samples) - 1, 1101 1102#if GEN_GEN >= 7 1103 .SurfaceArray = surface->surf.dim != ISL_SURF_DIM_3D, 1104#endif 1105 1106#if GEN_GEN >= 8 1107 .TileMode = YMAJOR, 1108#else 1109 .TiledSurface = true, 1110#endif 1111 }; 1112 1113 GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &ss); 1114 1115 blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); 1116} 1117 1118static void 1119blorp_emit_surface_states(struct blorp_batch *batch, 1120 const struct blorp_params *params) 1121{ 1122 const struct isl_device *isl_dev = batch->blorp->isl_dev; 1123 uint32_t bind_offset, surface_offsets[2]; 1124 void *surface_maps[2]; 1125 1126 if (params->use_pre_baked_binding_table) { 1127 bind_offset = params->pre_baked_binding_table_offset; 1128 } else { 1129 unsigned num_surfaces = 1 + params->src.enabled; 1130 blorp_alloc_binding_table(batch, num_surfaces, 1131 isl_dev->ss.size, isl_dev->ss.align, 1132 &bind_offset, surface_offsets, surface_maps); 1133 1134 if (params->dst.enabled) { 1135 blorp_emit_surface_state(batch, ¶ms->dst, 1136 surface_maps[BLORP_RENDERBUFFER_BT_INDEX], 1137 surface_offsets[BLORP_RENDERBUFFER_BT_INDEX], 1138 true); 1139 } else { 1140 assert(params->depth.enabled || params->stencil.enabled); 1141 const struct brw_blorp_surface_info *surface = 1142 params->depth.enabled ? ¶ms->depth : ¶ms->stencil; 1143 blorp_emit_null_surface_state(batch, surface, 1144 surface_maps[BLORP_RENDERBUFFER_BT_INDEX]); 1145 } 1146 1147 if (params->src.enabled) { 1148 blorp_emit_surface_state(batch, ¶ms->src, 1149 surface_maps[BLORP_TEXTURE_BT_INDEX], 1150 surface_offsets[BLORP_TEXTURE_BT_INDEX], false); 1151 } 1152 } 1153 1154#if GEN_GEN >= 7 1155 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), bt); 1156 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_HS), bt); 1157 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_DS), bt); 1158 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_GS), bt); 1159 1160 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { 1161 bt.PointertoPSBindingTable = bind_offset; 1162 } 1163#else 1164 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { 1165 bt.PSBindingTableChange = true; 1166 bt.PointertoPSBindingTable = bind_offset; 1167 } 1168#endif 1169} 1170 1171static void 1172blorp_emit_sampler_state(struct blorp_batch *batch, 1173 const struct blorp_params *params) 1174{ 1175 struct GENX(SAMPLER_STATE) sampler = { 1176 .MipModeFilter = MIPFILTER_NONE, 1177 .MagModeFilter = MAPFILTER_LINEAR, 1178 .MinModeFilter = MAPFILTER_LINEAR, 1179 .MinLOD = 0, 1180 .MaxLOD = 0, 1181 .TCXAddressControlMode = TCM_CLAMP, 1182 .TCYAddressControlMode = TCM_CLAMP, 1183 .TCZAddressControlMode = TCM_CLAMP, 1184 .MaximumAnisotropy = RATIO21, 1185 .RAddressMinFilterRoundingEnable = true, 1186 .RAddressMagFilterRoundingEnable = true, 1187 .VAddressMinFilterRoundingEnable = true, 1188 .VAddressMagFilterRoundingEnable = true, 1189 .UAddressMinFilterRoundingEnable = true, 1190 .UAddressMagFilterRoundingEnable = true, 1191 .NonnormalizedCoordinateEnable = true, 1192 }; 1193 1194 uint32_t offset; 1195 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_SAMPLER_STATE, 1196 GENX(SAMPLER_STATE_length) * 4, 1197 32, &offset); 1198 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); 1199 blorp_flush_range(batch, state, GENX(SAMPLER_STATE_length) * 4); 1200 1201#if GEN_GEN >= 7 1202 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { 1203 ssp.PointertoPSSamplerState = offset; 1204 } 1205#else 1206 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { 1207 ssp.VSSamplerStateChange = true; 1208 ssp.GSSamplerStateChange = true; 1209 ssp.PSSamplerStateChange = true; 1210 ssp.PointertoPSSamplerState = offset; 1211 } 1212#endif 1213} 1214 1215static void 1216blorp_emit_3dstate_multisample(struct blorp_batch *batch, 1217 const struct blorp_params *params) 1218{ 1219 blorp_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) { 1220 ms.NumberofMultisamples = __builtin_ffs(params->num_samples) - 1; 1221 1222#if GEN_GEN >= 8 1223 /* The PRM says that this bit is valid only for DX9: 1224 * 1225 * SW can choose to set this bit only for DX9 API. DX10/OGL API's 1226 * should not have any effect by setting or not setting this bit. 1227 */ 1228 ms.PixelPositionOffsetEnable = false; 1229 ms.PixelLocation = CENTER; 1230#elif GEN_GEN >= 7 1231 ms.PixelLocation = PIXLOC_CENTER; 1232 1233 switch (params->num_samples) { 1234 case 1: 1235 GEN_SAMPLE_POS_1X(ms.Sample); 1236 break; 1237 case 2: 1238 GEN_SAMPLE_POS_2X(ms.Sample); 1239 break; 1240 case 4: 1241 GEN_SAMPLE_POS_4X(ms.Sample); 1242 break; 1243 case 8: 1244 GEN_SAMPLE_POS_8X(ms.Sample); 1245 break; 1246 default: 1247 break; 1248 } 1249#else 1250 ms.PixelLocation = PIXLOC_CENTER; 1251 GEN_SAMPLE_POS_4X(ms.Sample); 1252#endif 1253 } 1254} 1255 1256#if GEN_GEN >= 8 1257/* Emits the Optimized HiZ sequence specified in the BDW+ PRMs. The 1258 * depth/stencil buffer extents are ignored to handle APIs which perform 1259 * clearing operations without such information. 1260 * */ 1261static void 1262blorp_emit_gen8_hiz_op(struct blorp_batch *batch, 1263 const struct blorp_params *params) 1264{ 1265 /* We should be performing an operation on a depth or stencil buffer. 1266 */ 1267 assert(params->depth.enabled || params->stencil.enabled); 1268 1269 /* The stencil buffer should only be enabled if a fast clear operation is 1270 * requested. 1271 */ 1272 if (params->stencil.enabled) 1273 assert(params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR); 1274 1275 /* If we can't alter the depth stencil config and multiple layers are 1276 * involved, the HiZ op will fail. This is because the op requires that a 1277 * new config is emitted for each additional layer. 1278 */ 1279 if (batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL) { 1280 assert(params->num_layers <= 1); 1281 } else { 1282 blorp_emit_depth_stencil_config(batch, params); 1283 } 1284 1285 blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp) { 1286 switch (params->hiz_op) { 1287 case BLORP_HIZ_OP_DEPTH_CLEAR: 1288 hzp.StencilBufferClearEnable = params->stencil.enabled; 1289 hzp.DepthBufferClearEnable = params->depth.enabled; 1290 hzp.StencilClearValue = params->stencil_ref; 1291 break; 1292 case BLORP_HIZ_OP_DEPTH_RESOLVE: 1293 hzp.DepthBufferResolveEnable = true; 1294 break; 1295 case BLORP_HIZ_OP_HIZ_RESOLVE: 1296 hzp.HierarchicalDepthBufferResolveEnable = true; 1297 break; 1298 case BLORP_HIZ_OP_NONE: 1299 unreachable("Invalid HIZ op"); 1300 } 1301 1302 hzp.NumberofMultisamples = ffs(params->num_samples) - 1; 1303 hzp.SampleMask = 0xFFFF; 1304 1305 /* Due to a hardware issue, this bit MBZ */ 1306 assert(hzp.ScissorRectangleEnable == false); 1307 1308 /* Contrary to the HW docs both fields are inclusive */ 1309 hzp.ClearRectangleXMin = params->x0; 1310 hzp.ClearRectangleYMin = params->y0; 1311 1312 /* Contrary to the HW docs both fields are exclusive */ 1313 hzp.ClearRectangleXMax = params->x1; 1314 hzp.ClearRectangleYMax = params->y1; 1315 } 1316 1317 /* PIPE_CONTROL w/ all bits clear except for “Post-Sync Operation” must set 1318 * to “Write Immediate Data” enabled. 1319 */ 1320 blorp_emit(batch, GENX(PIPE_CONTROL), pc) { 1321 pc.PostSyncOperation = WriteImmediateData; 1322 } 1323 1324 blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp); 1325 1326 /* Perform depth clear specific flushing */ 1327 if (params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR && params->depth.enabled) { 1328 blorp_emit(batch, GENX(PIPE_CONTROL), pc) { 1329 pc.DepthStallEnable = true; 1330 pc.DepthCacheFlushEnable = true; 1331 } 1332 } 1333} 1334#endif 1335 1336/* 3DSTATE_VIEWPORT_STATE_POINTERS */ 1337static void 1338blorp_emit_viewport_state(struct blorp_batch *batch, 1339 const struct blorp_params *params) 1340{ 1341 uint32_t cc_vp_offset; 1342 1343 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_VP_STATE, 1344 GENX(CC_VIEWPORT_length) * 4, 32, 1345 &cc_vp_offset); 1346 1347 GENX(CC_VIEWPORT_pack)(batch, state, 1348 &(struct GENX(CC_VIEWPORT)) { 1349 .MinimumDepth = 0.0, 1350 .MaximumDepth = 1.0, 1351 }); 1352 blorp_flush_range(batch, state, GENX(CC_VIEWPORT_length) * 4); 1353 1354#if GEN_GEN >= 7 1355 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { 1356 vsp.CCViewportPointer = cc_vp_offset; 1357 } 1358#else 1359 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { 1360 vsp.CCViewportStateChange = true; 1361 vsp.PointertoCC_VIEWPORT = cc_vp_offset; 1362 } 1363#endif 1364} 1365 1366 1367/** 1368 * \brief Execute a blit or render pass operation. 1369 * 1370 * To execute the operation, this function manually constructs and emits a 1371 * batch to draw a rectangle primitive. The batchbuffer is flushed before 1372 * constructing and after emitting the batch. 1373 * 1374 * This function alters no GL state. 1375 */ 1376static void 1377blorp_exec(struct blorp_batch *batch, const struct blorp_params *params) 1378{ 1379 uint32_t blend_state_offset = 0; 1380 uint32_t color_calc_state_offset = 0; 1381 uint32_t depth_stencil_state_offset; 1382 1383#if GEN_GEN >= 8 1384 if (params->hiz_op != BLORP_HIZ_OP_NONE) { 1385 blorp_emit_gen8_hiz_op(batch, params); 1386 return; 1387 } 1388#endif 1389 1390 blorp_emit_vertex_buffers(batch, params); 1391 blorp_emit_vertex_elements(batch, params); 1392 1393 emit_urb_config(batch, params); 1394 1395 if (params->wm_prog_data) { 1396 blend_state_offset = blorp_emit_blend_state(batch, params); 1397 } 1398 color_calc_state_offset = blorp_emit_color_calc_state(batch, params); 1399 depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params); 1400 1401#if GEN_GEN <= 6 1402 /* 3DSTATE_CC_STATE_POINTERS 1403 * 1404 * The pointer offsets are relative to 1405 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. 1406 * 1407 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. 1408 * 1409 * The dynamic state emit helpers emit their own STATE_POINTERS packets on 1410 * gen7+. However, on gen6 and earlier, they're all lumpped together in 1411 * one CC_STATE_POINTERS packet so we have to emit that here. 1412 */ 1413 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) { 1414 cc.BLEND_STATEChange = true; 1415 cc.COLOR_CALC_STATEChange = true; 1416 cc.DEPTH_STENCIL_STATEChange = true; 1417 cc.PointertoBLEND_STATE = blend_state_offset; 1418 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; 1419 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; 1420 } 1421#else 1422 (void)blend_state_offset; 1423 (void)color_calc_state_offset; 1424 (void)depth_stencil_state_offset; 1425#endif 1426 1427 blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs); 1428#if GEN_GEN >= 7 1429 blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs); 1430 blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS); 1431#endif 1432 blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs); 1433 blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps); 1434 1435 blorp_emit_surface_states(batch, params); 1436 1437 if (params->src.enabled) 1438 blorp_emit_sampler_state(batch, params); 1439 1440 blorp_emit_3dstate_multisample(batch, params); 1441 1442 blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) { 1443 mask.SampleMask = (1 << params->num_samples) - 1; 1444 } 1445 1446 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, 1447 * 3DSTATE_VS, Dword 5.0 "VS Function Enable": 1448 * 1449 * [DevSNB] A pipeline flush must be programmed prior to a 1450 * 3DSTATE_VS command that causes the VS Function Enable to 1451 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL 1452 * command with CS stall bit set and a post sync operation. 1453 * 1454 * We've already done one at the start of the BLORP operation. 1455 */ 1456 blorp_emit_vs_config(batch, params); 1457#if GEN_GEN >= 7 1458 blorp_emit(batch, GENX(3DSTATE_HS), hs); 1459 blorp_emit(batch, GENX(3DSTATE_TE), te); 1460 blorp_emit(batch, GENX(3DSTATE_DS), DS); 1461 blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so); 1462#endif 1463 blorp_emit(batch, GENX(3DSTATE_GS), gs); 1464 1465 blorp_emit(batch, GENX(3DSTATE_CLIP), clip) { 1466 clip.PerspectiveDivideDisable = true; 1467 } 1468 1469 blorp_emit_sf_config(batch, params); 1470 blorp_emit_ps_config(batch, params); 1471 1472 blorp_emit_viewport_state(batch, params); 1473 1474 if (!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) 1475 blorp_emit_depth_stencil_config(batch, params); 1476 1477 blorp_emit(batch, GENX(3DPRIMITIVE), prim) { 1478 prim.VertexAccessType = SEQUENTIAL; 1479 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; 1480 prim.VertexCountPerInstance = 3; 1481 prim.InstanceCount = params->num_layers; 1482 } 1483} 1484 1485#endif /* BLORP_GENX_EXEC_H */ 1486