gen7_blorp.cpp revision 127dc6d136db64fcf9448d66cb4c86db3bb11226
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25 26#include "intel_batchbuffer.h" 27#include "intel_fbo.h" 28#include "intel_mipmap_tree.h" 29 30#include "brw_context.h" 31#include "brw_defines.h" 32#include "brw_state.h" 33 34#include "brw_blorp.h" 35#include "gen7_blorp.h" 36 37 38/* 3DSTATE_URB_VS 39 * 3DSTATE_URB_HS 40 * 3DSTATE_URB_DS 41 * 3DSTATE_URB_GS 42 * 43 * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the 44 * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: 45 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be 46 * programmed in order for the programming of this state to be 47 * valid. 48 */ 49static void 50gen7_blorp_emit_urb_config(struct brw_context *brw, 51 const brw_blorp_params *params) 52{ 53 /* The minimum valid value is 32. See 3DSTATE_URB_VS, 54 * Dword 1.15:0 "VS Number of URB Entries". 55 */ 56 int num_vs_entries = 32; 57 int vs_size = 2; 58 int vs_start = 2; /* skip over push constants */ 59 60 gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start); 61} 62 63 64/* 3DSTATE_BLEND_STATE_POINTERS */ 65static void 66gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, 67 const brw_blorp_params *params, 68 uint32_t cc_blend_state_offset) 69{ 70 struct intel_context *intel = &brw->intel; 71 72 BEGIN_BATCH(2); 73 OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); 74 OUT_BATCH(cc_blend_state_offset | 1); 75 ADVANCE_BATCH(); 76} 77 78 79/* 3DSTATE_CC_STATE_POINTERS */ 80static void 81gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, 82 const brw_blorp_params *params, 83 uint32_t cc_state_offset) 84{ 85 struct intel_context *intel = &brw->intel; 86 87 BEGIN_BATCH(2); 88 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); 89 OUT_BATCH(cc_state_offset | 1); 90 ADVANCE_BATCH(); 91} 92 93static void 94gen7_blorp_emit_cc_viewport(struct brw_context *brw, 95 const brw_blorp_params *params) 96{ 97 struct intel_context *intel = &brw->intel; 98 struct brw_cc_viewport *ccv; 99 uint32_t cc_vp_offset; 100 101 ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, 102 sizeof(*ccv), 32, 103 &cc_vp_offset); 104 ccv->min_depth = 0.0; 105 ccv->max_depth = 1.0; 106 107 BEGIN_BATCH(2); 108 OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); 109 OUT_BATCH(cc_vp_offset); 110 ADVANCE_BATCH(); 111} 112 113 114/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS 115 * 116 * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. 117 */ 118static void 119gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, 120 const brw_blorp_params *params, 121 uint32_t depthstencil_offset) 122{ 123 struct intel_context *intel = &brw->intel; 124 125 BEGIN_BATCH(2); 126 OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); 127 OUT_BATCH(depthstencil_offset | 1); 128 ADVANCE_BATCH(); 129} 130 131 132/* SURFACE_STATE for renderbuffer or texture surface (see 133 * brw_update_renderbuffer_surface and brw_update_texture_surface) 134 */ 135static uint32_t 136gen7_blorp_emit_surface_state(struct brw_context *brw, 137 const brw_blorp_params *params, 138 const brw_blorp_surface_info *surface, 139 uint32_t read_domains, uint32_t write_domain, 140 bool is_render_target) 141{ 142 struct intel_context *intel = &brw->intel; 143 144 uint32_t wm_surf_offset; 145 uint32_t width = surface->width; 146 uint32_t height = surface->height; 147 /* Note: since gen7 uses INTEL_MSAA_LAYOUT_CMS or INTEL_MSAA_LAYOUT_UMS for 148 * color surfaces, width and height are measured in pixels; we don't need 149 * to divide them by 2 as we do for Gen6 (see 150 * gen6_blorp_emit_surface_state). 151 */ 152 struct intel_region *region = surface->mt->region; 153 154 struct gen7_surface_state *surf = (struct gen7_surface_state *) 155 brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, sizeof(*surf), 32, 156 &wm_surf_offset); 157 memset(surf, 0, sizeof(*surf)); 158 159 if (surface->mt->align_h == 4) 160 surf->ss0.vertical_alignment = 1; 161 if (surface->mt->align_w == 8) 162 surf->ss0.horizontal_alignment = 1; 163 164 surf->ss0.surface_format = surface->brw_surfaceformat; 165 surf->ss0.surface_type = BRW_SURFACE_2D; 166 surf->ss0.surface_array_spacing = surface->array_spacing_lod0 ? 167 GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL; 168 169 /* reloc */ 170 surf->ss1.base_addr = region->bo->offset; /* No tile offsets needed */ 171 172 surf->ss2.width = width - 1; 173 surf->ss2.height = height - 1; 174 175 uint32_t tiling = surface->map_stencil_as_y_tiled 176 ? I915_TILING_Y : region->tiling; 177 gen7_set_surface_tiling(surf, tiling); 178 179 uint32_t pitch_bytes = region->pitch * region->cpp; 180 if (surface->map_stencil_as_y_tiled) 181 pitch_bytes *= 2; 182 surf->ss3.pitch = pitch_bytes - 1; 183 184 gen7_set_surface_msaa(surf, surface->num_samples, surface->msaa_layout); 185 if (surface->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { 186 gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, 187 surface->mt->mcs_mt, is_render_target); 188 } 189 190 if (intel->is_haswell) { 191 surf->ss7.shader_channel_select_r = HSW_SCS_RED; 192 surf->ss7.shader_channel_select_g = HSW_SCS_GREEN; 193 surf->ss7.shader_channel_select_b = HSW_SCS_BLUE; 194 surf->ss7.shader_channel_select_a = HSW_SCS_ALPHA; 195 } 196 197 /* Emit relocation to surface contents */ 198 drm_intel_bo_emit_reloc(brw->intel.batch.bo, 199 wm_surf_offset + 200 offsetof(struct gen7_surface_state, ss1), 201 region->bo, 202 surf->ss1.base_addr - region->bo->offset, 203 read_domains, write_domain); 204 205 gen7_check_surface_setup(surf, is_render_target); 206 207 return wm_surf_offset; 208} 209 210 211/** 212 * SAMPLER_STATE. See gen7_update_sampler_state(). 213 */ 214static uint32_t 215gen7_blorp_emit_sampler_state(struct brw_context *brw, 216 const brw_blorp_params *params) 217{ 218 uint32_t sampler_offset; 219 220 struct gen7_sampler_state *sampler = (struct gen7_sampler_state *) 221 brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 222 sizeof(struct gen7_sampler_state), 223 32, &sampler_offset); 224 memset(sampler, 0, sizeof(*sampler)); 225 226 sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; 227 sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; 228 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 229 230 sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 231 sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 232 sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 233 234 // sampler->ss0.min_mag_neq = 1; 235 236 /* Set LOD bias: 237 */ 238 sampler->ss0.lod_bias = 0; 239 240 sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ 241 sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ 242 243 /* Set BaseMipLevel, MaxLOD, MinLOD: 244 * 245 * XXX: I don't think that using firstLevel, lastLevel works, 246 * because we always setup the surface state as if firstLevel == 247 * level zero. Probably have to subtract firstLevel from each of 248 * these: 249 */ 250 sampler->ss0.base_level = U_FIXED(0, 1); 251 252 sampler->ss1.max_lod = U_FIXED(0, 8); 253 sampler->ss1.min_lod = U_FIXED(0, 8); 254 255 sampler->ss3.non_normalized_coord = 1; 256 257 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | 258 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | 259 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN; 260 sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | 261 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | 262 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; 263 264 return sampler_offset; 265} 266 267 268/* 3DSTATE_HS 269 * 270 * Disable the hull shader. 271 */ 272static void 273gen7_blorp_emit_hs_disable(struct brw_context *brw, 274 const brw_blorp_params *params) 275{ 276 struct intel_context *intel = &brw->intel; 277 278 BEGIN_BATCH(7); 279 OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); 280 OUT_BATCH(0); 281 OUT_BATCH(0); 282 OUT_BATCH(0); 283 OUT_BATCH(0); 284 OUT_BATCH(0); 285 OUT_BATCH(0); 286 ADVANCE_BATCH(); 287} 288 289 290/* 3DSTATE_TE 291 * 292 * Disable the tesselation engine. 293 */ 294static void 295gen7_blorp_emit_te_disable(struct brw_context *brw, 296 const brw_blorp_params *params) 297{ 298 struct intel_context *intel = &brw->intel; 299 300 BEGIN_BATCH(4); 301 OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); 302 OUT_BATCH(0); 303 OUT_BATCH(0); 304 OUT_BATCH(0); 305 ADVANCE_BATCH(); 306} 307 308 309/* 3DSTATE_DS 310 * 311 * Disable the domain shader. 312 */ 313static void 314gen7_blorp_emit_ds_disable(struct brw_context *brw, 315 const brw_blorp_params *params) 316{ 317 struct intel_context *intel = &brw->intel; 318 319 BEGIN_BATCH(6); 320 OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); 321 OUT_BATCH(0); 322 OUT_BATCH(0); 323 OUT_BATCH(0); 324 OUT_BATCH(0); 325 OUT_BATCH(0); 326 ADVANCE_BATCH(); 327} 328 329 330/* 3DSTATE_STREAMOUT 331 * 332 * Disable streamout. 333 */ 334static void 335gen7_blorp_emit_streamout_disable(struct brw_context *brw, 336 const brw_blorp_params *params) 337{ 338 struct intel_context *intel = &brw->intel; 339 340 BEGIN_BATCH(3); 341 OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); 342 OUT_BATCH(0); 343 OUT_BATCH(0); 344 ADVANCE_BATCH(); 345} 346 347 348static void 349gen7_blorp_emit_sf_config(struct brw_context *brw, 350 const brw_blorp_params *params) 351{ 352 struct intel_context *intel = &brw->intel; 353 354 /* 3DSTATE_SF 355 * 356 * Disable ViewportTransformEnable (dw1.1) 357 * 358 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D 359 * Primitives Overview": 360 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the 361 * use of screen- space coordinates). 362 * 363 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) 364 * and BackFaceFillMode (dw1.4:3) to SOLID(0). 365 * 366 * From the Sandy Bridge PRM, Volume 2, Part 1, Section 367 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: 368 * SOLID: Any triangle or rectangle object found to be front-facing 369 * is rendered as a solid object. This setting is required when 370 * (rendering rectangle (RECTLIST) objects. 371 */ 372 { 373 BEGIN_BATCH(7); 374 OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); 375 OUT_BATCH(params->depth_format << 376 GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); 377 OUT_BATCH(params->num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); 378 OUT_BATCH(0); 379 OUT_BATCH(0); 380 OUT_BATCH(0); 381 OUT_BATCH(0); 382 ADVANCE_BATCH(); 383 } 384 385 /* 3DSTATE_SBE */ 386 { 387 BEGIN_BATCH(14); 388 OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); 389 OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ 390 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | 391 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); 392 for (int i = 0; i < 12; ++i) 393 OUT_BATCH(0); 394 ADVANCE_BATCH(); 395 } 396} 397 398 399/** 400 * Disable thread dispatch (dw5.19) and enable the HiZ op. 401 */ 402static void 403gen7_blorp_emit_wm_config(struct brw_context *brw, 404 const brw_blorp_params *params, 405 brw_blorp_prog_data *prog_data) 406{ 407 struct intel_context *intel = &brw->intel; 408 409 uint32_t dw1 = 0, dw2 = 0; 410 411 switch (params->hiz_op) { 412 case GEN6_HIZ_OP_DEPTH_CLEAR: 413 dw1 |= GEN7_WM_DEPTH_CLEAR; 414 break; 415 case GEN6_HIZ_OP_DEPTH_RESOLVE: 416 dw1 |= GEN7_WM_DEPTH_RESOLVE; 417 break; 418 case GEN6_HIZ_OP_HIZ_RESOLVE: 419 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; 420 break; 421 case GEN6_HIZ_OP_NONE: 422 break; 423 default: 424 assert(0); 425 break; 426 } 427 dw1 |= GEN7_WM_STATISTICS_ENABLE; 428 dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; 429 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; 430 dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ 431 if (params->use_wm_prog) { 432 dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */ 433 dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ 434 } 435 436 if (params->num_samples > 1) { 437 dw1 |= GEN7_WM_MSRAST_ON_PATTERN; 438 if (prog_data && prog_data->persample_msaa_dispatch) 439 dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; 440 else 441 dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; 442 } else { 443 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; 444 dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; 445 } 446 447 BEGIN_BATCH(3); 448 OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); 449 OUT_BATCH(dw1); 450 OUT_BATCH(dw2); 451 ADVANCE_BATCH(); 452} 453 454 455/** 456 * 3DSTATE_PS 457 * 458 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite 459 * that, thread dispatch info must still be specified. 460 * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec 461 * states that the valid range for this field is [0x3, 0x2f]. 462 * - A dispatch mode must be given; that is, at least one of the 463 * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was 464 * discovered through simulator error messages. 465 */ 466static void 467gen7_blorp_emit_ps_config(struct brw_context *brw, 468 const brw_blorp_params *params, 469 uint32_t prog_offset, 470 brw_blorp_prog_data *prog_data) 471{ 472 struct intel_context *intel = &brw->intel; 473 uint32_t dw2, dw4, dw5; 474 const int max_threads_shift = brw->intel.is_haswell ? 475 HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; 476 477 dw2 = dw4 = dw5 = 0; 478 dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; 479 480 /* If there's a WM program, we need to do 16-pixel dispatch since that's 481 * what the program is compiled for. If there isn't, then it shouldn't 482 * matter because no program is actually being run. However, the hardware 483 * gets angry if we don't enable at least one dispatch mode, so just enable 484 * 16-pixel dispatch unconditionally. 485 */ 486 dw4 |= GEN7_PS_16_DISPATCH_ENABLE; 487 488 if (intel->is_haswell) 489 dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ 490 if (params->use_wm_prog) { 491 dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ 492 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; 493 dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; 494 } 495 496 BEGIN_BATCH(8); 497 OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); 498 OUT_BATCH(params->use_wm_prog ? prog_offset : 0); 499 OUT_BATCH(dw2); 500 OUT_BATCH(0); 501 OUT_BATCH(dw4); 502 OUT_BATCH(dw5); 503 OUT_BATCH(0); 504 OUT_BATCH(0); 505 ADVANCE_BATCH(); 506} 507 508 509static void 510gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, 511 const brw_blorp_params *params, 512 uint32_t wm_bind_bo_offset) 513{ 514 struct intel_context *intel = &brw->intel; 515 516 BEGIN_BATCH(2); 517 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); 518 OUT_BATCH(wm_bind_bo_offset); 519 ADVANCE_BATCH(); 520} 521 522 523static void 524gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, 525 const brw_blorp_params *params, 526 uint32_t sampler_offset) 527{ 528 struct intel_context *intel = &brw->intel; 529 530 BEGIN_BATCH(2); 531 OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); 532 OUT_BATCH(sampler_offset); 533 ADVANCE_BATCH(); 534} 535 536 537static void 538gen7_blorp_emit_constant_ps(struct brw_context *brw, 539 const brw_blorp_params *params, 540 uint32_t wm_push_const_offset) 541{ 542 struct intel_context *intel = &brw->intel; 543 544 /* Make sure the push constants fill an exact integer number of 545 * registers. 546 */ 547 assert(sizeof(brw_blorp_wm_push_constants) % 32 == 0); 548 549 /* There must be at least one register worth of push constant data. */ 550 assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0); 551 552 /* Enable push constant buffer 0. */ 553 BEGIN_BATCH(7); 554 OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | 555 (7 - 2)); 556 OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); 557 OUT_BATCH(0); 558 OUT_BATCH(wm_push_const_offset); 559 OUT_BATCH(0); 560 OUT_BATCH(0); 561 OUT_BATCH(0); 562 ADVANCE_BATCH(); 563} 564 565 566static void 567gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, 568 const brw_blorp_params *params) 569{ 570 struct intel_context *intel = &brw->intel; 571 uint32_t draw_x = params->depth.x_offset; 572 uint32_t draw_y = params->depth.y_offset; 573 uint32_t tile_mask_x, tile_mask_y; 574 575 gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y); 576 577 /* 3DSTATE_DEPTH_BUFFER */ 578 { 579 uint32_t tile_x = draw_x & tile_mask_x; 580 uint32_t tile_y = draw_y & tile_mask_y; 581 uint32_t offset = 582 intel_region_get_aligned_offset(params->depth.mt->region, 583 draw_x & ~tile_mask_x, 584 draw_y & ~tile_mask_y); 585 586 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 587 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth 588 * Coordinate Offset X/Y": 589 * 590 * "The 3 LSBs of both offsets must be zero to ensure correct 591 * alignment" 592 * 593 * We have no guarantee that tile_x and tile_y are correctly aligned, 594 * since they are determined by the mipmap layout, which is only aligned 595 * to multiples of 4. 596 * 597 * So, to avoid hanging the GPU, just smash the low order 3 bits of 598 * tile_x and tile_y to 0. This is a temporary workaround until we come 599 * up with a better solution. 600 */ 601 tile_x &= ~7; 602 tile_y &= ~7; 603 604 intel_emit_depth_stall_flushes(intel); 605 606 BEGIN_BATCH(7); 607 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); 608 uint32_t pitch_bytes = 609 params->depth.mt->region->pitch * params->depth.mt->region->cpp; 610 OUT_BATCH((pitch_bytes - 1) | 611 params->depth_format << 18 | 612 1 << 22 | /* hiz enable */ 613 1 << 28 | /* depth write */ 614 BRW_SURFACE_2D << 29); 615 OUT_RELOC(params->depth.mt->region->bo, 616 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 617 offset); 618 OUT_BATCH((params->depth.width + tile_x - 1) << 4 | 619 (params->depth.height + tile_y - 1) << 18); 620 OUT_BATCH(0); 621 OUT_BATCH(tile_x | 622 tile_y << 16); 623 OUT_BATCH(0); 624 ADVANCE_BATCH(); 625 } 626 627 /* 3DSTATE_HIER_DEPTH_BUFFER */ 628 { 629 struct intel_region *hiz_region = params->depth.mt->hiz_mt->region; 630 uint32_t hiz_offset = 631 intel_region_get_aligned_offset(hiz_region, 632 draw_x & ~tile_mask_x, 633 (draw_y & ~tile_mask_y) / 2); 634 635 BEGIN_BATCH(3); 636 OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); 637 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); 638 OUT_RELOC(hiz_region->bo, 639 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 640 hiz_offset); 641 ADVANCE_BATCH(); 642 } 643 644 /* 3DSTATE_STENCIL_BUFFER */ 645 { 646 BEGIN_BATCH(3); 647 OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); 648 OUT_BATCH(0); 649 OUT_BATCH(0); 650 ADVANCE_BATCH(); 651 } 652} 653 654 655static void 656gen7_blorp_emit_depth_disable(struct brw_context *brw, 657 const brw_blorp_params *params) 658{ 659 struct intel_context *intel = &brw->intel; 660 661 BEGIN_BATCH(7); 662 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); 663 OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29)); 664 OUT_BATCH(0); 665 OUT_BATCH(0); 666 OUT_BATCH(0); 667 OUT_BATCH(0); 668 OUT_BATCH(0); 669 ADVANCE_BATCH(); 670} 671 672 673/* 3DSTATE_CLEAR_PARAMS 674 * 675 * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 676 * 3DSTATE_CLEAR_PARAMS: 677 * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along 678 * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, 679 * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). 680 */ 681static void 682gen7_blorp_emit_clear_params(struct brw_context *brw, 683 const brw_blorp_params *params) 684{ 685 struct intel_context *intel = &brw->intel; 686 687 BEGIN_BATCH(3); 688 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); 689 OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0); 690 OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); 691 ADVANCE_BATCH(); 692} 693 694 695/* 3DPRIMITIVE */ 696static void 697gen7_blorp_emit_primitive(struct brw_context *brw, 698 const brw_blorp_params *params) 699{ 700 struct intel_context *intel = &brw->intel; 701 702 BEGIN_BATCH(7); 703 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); 704 OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | 705 _3DPRIM_RECTLIST); 706 OUT_BATCH(3); /* vertex count per instance */ 707 OUT_BATCH(0); 708 OUT_BATCH(1); /* instance count */ 709 OUT_BATCH(0); 710 OUT_BATCH(0); 711 ADVANCE_BATCH(); 712} 713 714 715/** 716 * \copydoc gen6_blorp_exec() 717 */ 718void 719gen7_blorp_exec(struct intel_context *intel, 720 const brw_blorp_params *params) 721{ 722 struct gl_context *ctx = &intel->ctx; 723 struct brw_context *brw = brw_context(ctx); 724 brw_blorp_prog_data *prog_data = NULL; 725 uint32_t cc_blend_state_offset = 0; 726 uint32_t cc_state_offset = 0; 727 uint32_t depthstencil_offset; 728 uint32_t wm_push_const_offset = 0; 729 uint32_t wm_bind_bo_offset = 0; 730 uint32_t sampler_offset = 0; 731 732 uint32_t prog_offset = params->get_wm_prog(brw, &prog_data); 733 gen6_blorp_emit_batch_head(brw, params); 734 gen7_allocate_push_constants(brw); 735 gen6_emit_3dstate_multisample(brw, params->num_samples); 736 gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false); 737 gen6_blorp_emit_state_base_address(brw, params); 738 gen6_blorp_emit_vertices(brw, params); 739 gen7_blorp_emit_urb_config(brw, params); 740 if (params->use_wm_prog) { 741 cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); 742 cc_state_offset = gen6_blorp_emit_cc_state(brw, params); 743 gen7_blorp_emit_blend_state_pointer(brw, params, cc_blend_state_offset); 744 gen7_blorp_emit_cc_state_pointer(brw, params, cc_state_offset); 745 } 746 depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); 747 gen7_blorp_emit_depth_stencil_state_pointers(brw, params, 748 depthstencil_offset); 749 if (params->use_wm_prog) { 750 uint32_t wm_surf_offset_renderbuffer; 751 uint32_t wm_surf_offset_texture; 752 wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); 753 wm_surf_offset_renderbuffer = 754 gen7_blorp_emit_surface_state(brw, params, ¶ms->dst, 755 I915_GEM_DOMAIN_RENDER, 756 I915_GEM_DOMAIN_RENDER, 757 true /* is_render_target */); 758 wm_surf_offset_texture = 759 gen7_blorp_emit_surface_state(brw, params, ¶ms->src, 760 I915_GEM_DOMAIN_SAMPLER, 0, 761 false /* is_render_target */); 762 wm_bind_bo_offset = 763 gen6_blorp_emit_binding_table(brw, params, 764 wm_surf_offset_renderbuffer, 765 wm_surf_offset_texture); 766 sampler_offset = gen7_blorp_emit_sampler_state(brw, params); 767 } 768 gen6_blorp_emit_vs_disable(brw, params); 769 gen7_blorp_emit_hs_disable(brw, params); 770 gen7_blorp_emit_te_disable(brw, params); 771 gen7_blorp_emit_ds_disable(brw, params); 772 gen6_blorp_emit_gs_disable(brw, params); 773 gen7_blorp_emit_streamout_disable(brw, params); 774 gen6_blorp_emit_clip_disable(brw, params); 775 gen7_blorp_emit_sf_config(brw, params); 776 gen7_blorp_emit_wm_config(brw, params, prog_data); 777 if (params->use_wm_prog) { 778 gen7_blorp_emit_binding_table_pointers_ps(brw, params, 779 wm_bind_bo_offset); 780 gen7_blorp_emit_sampler_state_pointers_ps(brw, params, sampler_offset); 781 gen7_blorp_emit_constant_ps(brw, params, wm_push_const_offset); 782 } 783 gen7_blorp_emit_ps_config(brw, params, prog_offset, prog_data); 784 gen7_blorp_emit_cc_viewport(brw, params); 785 786 if (params->depth.mt) 787 gen7_blorp_emit_depth_stencil_config(brw, params); 788 else 789 gen7_blorp_emit_depth_disable(brw, params); 790 gen7_blorp_emit_clear_params(brw, params); 791 gen6_blorp_emit_drawing_rectangle(brw, params); 792 gen7_blorp_emit_primitive(brw, params); 793 794 /* See comments above at first invocation of intel_flush() in 795 * gen6_blorp_emit_batch_head(). 796 */ 797 intel_flush(ctx); 798 799 /* Be safe. */ 800 brw->state.dirty.brw = ~0; 801 brw->state.dirty.cache = ~0; 802} 803