genX_blorp_exec.c revision 7b035fd0c97939a65825f6e1b467b0d741382bc5
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25
26#include "intel_batchbuffer.h"
27#include "intel_mipmap_tree.h"
28
29#include "brw_context.h"
30#include "brw_state.h"
31
32#include "blorp_priv.h"
33
34#include "genxml/gen_macros.h"
35
36static void *
37blorp_emit_dwords(struct brw_context *brw, unsigned n)
38{
39   intel_batchbuffer_begin(brw, n, RENDER_RING);
40   uint32_t *map = brw->batch.map_next;
41   brw->batch.map_next += n;
42   intel_batchbuffer_advance(brw);
43   return map;
44}
45
46struct blorp_address {
47   drm_intel_bo *buffer;
48   uint32_t read_domains;
49   uint32_t write_domain;
50   uint32_t offset;
51};
52
53static uint64_t
54blorp_emit_reloc(struct brw_context *brw, void *location,
55                 struct blorp_address address, uint32_t delta)
56{
57   uint32_t offset = (char *)location - (char *)brw->batch.map;
58   if (brw->gen >= 8) {
59      return intel_batchbuffer_reloc64(brw, address.buffer, offset,
60                                       address.read_domains,
61                                       address.write_domain,
62                                       address.offset + delta);
63   } else {
64      return intel_batchbuffer_reloc(brw, address.buffer, offset,
65                                     address.read_domains,
66                                     address.write_domain,
67                                     address.offset + delta);
68   }
69}
70
71#define __gen_address_type struct blorp_address
72#define __gen_user_data struct brw_context
73
74static uint64_t
75__gen_combine_address(struct brw_context *brw, void *location,
76                      struct blorp_address address, uint32_t delta)
77{
78   if (address.buffer == NULL) {
79      return address.offset + delta;
80   } else {
81      return blorp_emit_reloc(brw, location, address, delta);
82   }
83}
84
85#include "genxml/genX_pack.h"
86
87#define _blorp_cmd_length(cmd) cmd ## _length
88#define _blorp_cmd_header(cmd) cmd ## _header
89#define _blorp_cmd_pack(cmd) cmd ## _pack
90
91#define blorp_emit(brw, cmd, name)                                \
92   for (struct cmd name = { _blorp_cmd_header(cmd) },             \
93        *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd));   \
94        __builtin_expect(_dst != NULL, 1);                        \
95        _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name),           \
96        _dst = NULL)
97
98static void
99blorp_emit_sf_config(struct brw_context *brw,
100                     const struct brw_blorp_params *params)
101{
102   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
103
104   /* 3DSTATE_SF
105    *
106    * Disable ViewportTransformEnable (dw2.1)
107    *
108    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
109    * Primitives Overview":
110    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
111    *     use of screen- space coordinates).
112    *
113    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
114    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
115    *
116    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
117    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
118    *     SOLID: Any triangle or rectangle object found to be front-facing
119    *     is rendered as a solid object. This setting is required when
120    *     (rendering rectangle (RECTLIST) objects.
121    */
122   blorp_emit(brw, GENX(3DSTATE_SF), sf) {
123      sf.FrontFaceFillMode = FILL_MODE_SOLID;
124      sf.BackFaceFillMode = FILL_MODE_SOLID;
125
126      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
127         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
128
129      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
130      if (prog_data) {
131         sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
132         sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
133         sf.ConstantInterpolationEnable = prog_data->flat_inputs;
134      } else {
135         sf.NumberofSFOutputAttributes = 0;
136         sf.VertexURBEntryReadLength = 1;
137      }
138   }
139}
140
141static void
142blorp_emit_wm_config(struct brw_context *brw,
143                     const struct brw_blorp_params *params)
144{
145   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
146
147   /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
148    * nonzero to prevent the GPU from hanging.  While the documentation doesn't
149    * mention this explicitly, it notes that the valid range for the field is
150    * [1,39] = [2,40] threads, which excludes zero.
151    *
152    * To be safe (and to minimize extraneous code) we go ahead and fully
153    * configure the WM state whether or not there is a WM program.
154    */
155   blorp_emit(brw, GENX(3DSTATE_WM), wm) {
156      wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
157
158      switch (params->hiz_op) {
159      case GEN6_HIZ_OP_DEPTH_CLEAR:
160         wm.DepthBufferClear = true;
161         break;
162      case GEN6_HIZ_OP_DEPTH_RESOLVE:
163         wm.DepthBufferResolveEnable = true;
164         break;
165      case GEN6_HIZ_OP_HIZ_RESOLVE:
166         wm.HierarchicalDepthBufferResolveEnable = true;
167         break;
168      case GEN6_HIZ_OP_NONE:
169         break;
170      default:
171         unreachable("not reached");
172      }
173
174      if (prog_data) {
175         wm.ThreadDispatchEnable = true;
176
177         wm.DispatchGRFStartRegisterforConstantSetupData0 =
178            prog_data->first_curbe_grf_0;
179         wm.DispatchGRFStartRegisterforConstantSetupData2 =
180            prog_data->first_curbe_grf_2;
181
182         wm.KernelStartPointer0 = params->wm_prog_kernel;
183         wm.KernelStartPointer2 =
184            params->wm_prog_kernel + prog_data->ksp_offset_2;
185
186         wm._8PixelDispatchEnable = prog_data->dispatch_8;
187         wm._16PixelDispatchEnable = prog_data->dispatch_16;
188
189         wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
190      }
191
192      if (params->src.bo) {
193         wm.SamplerCount = 1; /* Up to 4 samplers */
194         wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
195      }
196
197      if (params->dst.surf.samples > 1) {
198         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
199         wm.MultisampleDispatchMode =
200            (prog_data && prog_data->persample_msaa_dispatch) ?
201            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
202      } else {
203         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
204         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
205      }
206   }
207}
208
209
210static void
211blorp_emit_depth_stencil_config(struct brw_context *brw,
212                                const struct brw_blorp_params *params)
213{
214   brw_emit_depth_stall_flushes(brw);
215
216   blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
217      switch (params->depth.surf.dim) {
218      case ISL_SURF_DIM_1D:
219         db.SurfaceType = SURFTYPE_1D;
220         break;
221      case ISL_SURF_DIM_2D:
222         db.SurfaceType = SURFTYPE_2D;
223         break;
224      case ISL_SURF_DIM_3D:
225         db.SurfaceType = SURFTYPE_3D;
226         break;
227      }
228
229      db.SurfaceFormat = params->depth_format;
230
231      db.TiledSurface = true;
232      db.TileWalk = TILEWALK_YMAJOR;
233      db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
234
235      db.HierarchicalDepthBufferEnable = true;
236      db.SeparateStencilBufferEnable = true;
237
238      db.Width = params->depth.surf.logical_level0_px.width - 1;
239      db.Height = params->depth.surf.logical_level0_px.height - 1;
240      db.RenderTargetViewExtent = db.Depth =
241         MAX2(params->depth.surf.logical_level0_px.depth,
242              params->depth.surf.logical_level0_px.array_len) - 1;
243
244      db.LOD = params->depth.view.base_level;
245      db.MinimumArrayElement = params->depth.view.base_array_layer;
246
247      db.SurfacePitch = params->depth.surf.row_pitch - 1;
248      db.SurfaceBaseAddress = (struct blorp_address) {
249         .buffer = params->depth.bo,
250         .read_domains = I915_GEM_DOMAIN_RENDER,
251         .write_domain = I915_GEM_DOMAIN_RENDER,
252         .offset = params->depth.offset,
253      };
254   }
255
256   blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
257      hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
258      hiz.SurfaceBaseAddress = (struct blorp_address) {
259         .buffer = params->depth.aux_bo,
260         .read_domains = I915_GEM_DOMAIN_RENDER,
261         .write_domain = I915_GEM_DOMAIN_RENDER,
262         .offset = params->depth.aux_offset,
263      };
264   }
265
266   blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
267}
268
269static uint32_t
270blorp_emit_blend_state(struct brw_context *brw,
271                       const struct brw_blorp_params *params)
272{
273   struct GENX(BLEND_STATE) blend;
274   memset(&blend, 0, sizeof(blend));
275
276   for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
277      blend.Entry[i].PreBlendColorClampEnable = true;
278      blend.Entry[i].PostBlendColorClampEnable = true;
279      blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
280
281      blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
282      blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
283      blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
284      blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
285   }
286
287   uint32_t offset;
288   void *state = brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
289                                 GENX(BLEND_STATE_length) * 4, 64, &offset);
290   GENX(BLEND_STATE_pack)(NULL, state, &blend);
291
292   return offset;
293}
294
295static uint32_t
296blorp_emit_color_calc_state(struct brw_context *brw,
297                            const struct brw_blorp_params *params)
298{
299   uint32_t offset;
300   void *state = brw_state_batch(brw, AUB_TRACE_CC_STATE,
301                                 GENX(COLOR_CALC_STATE_length) * 4, 64, &offset);
302   memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
303
304   return offset;
305}
306
307static uint32_t
308blorp_emit_depth_stencil_state(struct brw_context *brw,
309                               const struct brw_blorp_params *params)
310{
311   /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
312    *   - 7.5.3.1 Depth Buffer Clear
313    *   - 7.5.3.2 Depth Buffer Resolve
314    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
315    */
316   struct GENX(DEPTH_STENCIL_STATE) ds = {
317      .DepthBufferWriteEnable = true,
318   };
319
320   if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
321      ds.DepthTestEnable = true;
322      ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
323   }
324
325   uint32_t offset;
326   void *state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
327                                 GENX(DEPTH_STENCIL_STATE_length) * 4, 64,
328                                 &offset);
329   GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
330
331   return offset;
332}
333
334static void
335blorp_emit_surface_states(struct brw_context *brw,
336                          const struct brw_blorp_params *params)
337{
338   uint32_t bind_offset;
339   uint32_t *bind =
340      brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
341                      sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
342                      32, /* alignment */ &bind_offset);
343
344   bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
345      brw_blorp_emit_surface_state(brw, &params->dst,
346                                   I915_GEM_DOMAIN_RENDER,
347                                   I915_GEM_DOMAIN_RENDER, true);
348   if (params->src.bo) {
349      bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] =
350         brw_blorp_emit_surface_state(brw, &params->src,
351                                      I915_GEM_DOMAIN_SAMPLER, 0, false);
352   }
353
354   blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
355      bt.PSBindingTableChange = true;
356      bt.PointertoPSBindingTable = bind_offset;
357   }
358}
359
360static void
361blorp_emit_sampler_state(struct brw_context *brw,
362                         const struct brw_blorp_params *params)
363{
364   struct GENX(SAMPLER_STATE) sampler = {
365      .MipModeFilter = MIPFILTER_NONE,
366      .MagModeFilter = MAPFILTER_LINEAR,
367      .MinModeFilter = MAPFILTER_LINEAR,
368      .MinLOD = 0,
369      .MaxLOD = 0,
370      .TCXAddressControlMode = TCM_CLAMP,
371      .TCYAddressControlMode = TCM_CLAMP,
372      .TCZAddressControlMode = TCM_CLAMP,
373      .MaximumAnisotropy = RATIO21,
374      .RAddressMinFilterRoundingEnable = true,
375      .RAddressMagFilterRoundingEnable = true,
376      .VAddressMinFilterRoundingEnable = true,
377      .VAddressMagFilterRoundingEnable = true,
378      .UAddressMinFilterRoundingEnable = true,
379      .UAddressMagFilterRoundingEnable = true,
380      .NonnormalizedCoordinateEnable = true,
381   };
382
383   uint32_t offset;
384   void *state = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
385                                 GENX(SAMPLER_STATE_length) * 4, 32, &offset);
386   GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
387
388   blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
389      ssp.VSSamplerStateChange = true;
390      ssp.GSSamplerStateChange = true;
391      ssp.PSSamplerStateChange = true;
392      ssp.PointertoPSSamplerState = offset;
393   }
394}
395
396/* 3DSTATE_VIEWPORT_STATE_POINTERS */
397static void
398blorp_emit_viewport_state(struct brw_context *brw,
399                          const struct brw_blorp_params *params)
400{
401   uint32_t cc_vp_offset;
402
403   void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
404                                 GENX(CC_VIEWPORT_length) * 4, 32,
405                                 &cc_vp_offset);
406
407   GENX(CC_VIEWPORT_pack)(brw, state,
408      &(struct GENX(CC_VIEWPORT)) {
409         .MinimumDepth = 0.0,
410         .MaximumDepth = 1.0,
411      });
412
413   blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
414      vsp.CCViewportStateChange = true;
415      vsp.PointertoCC_VIEWPORT = cc_vp_offset;
416   }
417}
418
419
420/**
421 * \brief Execute a blit or render pass operation.
422 *
423 * To execute the operation, this function manually constructs and emits a
424 * batch to draw a rectangle primitive. The batchbuffer is flushed before
425 * constructing and after emitting the batch.
426 *
427 * This function alters no GL state.
428 */
429void
430genX(blorp_exec)(struct brw_context *brw,
431                 const struct brw_blorp_params *params)
432{
433   uint32_t blend_state_offset = 0;
434   uint32_t color_calc_state_offset = 0;
435   uint32_t depth_stencil_state_offset;
436
437   /* Emit workaround flushes when we switch from drawing to blorping. */
438   brw_emit_post_sync_nonzero_flush(brw);
439
440   brw_upload_state_base_address(brw);
441
442   gen6_blorp_emit_vertices(brw, params);
443
444   /* 3DSTATE_URB
445    *
446    * Assign the entire URB to the VS. Even though the VS disabled, URB space
447    * is still needed because the clipper loads the VUE's from the URB. From
448    * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
449    * Dword 1.15:0 "VS Number of URB Entries":
450    *     This field is always used (even if VS Function Enable is DISABLED).
451    *
452    * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
453    * safely ignore it because this batch contains only one draw call.
454    *     Because of URB corruption caused by allocating a previous GS unit
455    *     URB entry to the VS unit, software is required to send a “GS NULL
456    *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
457    *     plus a dummy DRAW call before any case where VS will be taking over
458    *     GS URB space.
459    */
460   blorp_emit(brw, GENX(3DSTATE_URB), urb) {
461      urb.VSNumberofURBEntries = brw->urb.max_vs_entries;
462   }
463
464   if (params->wm_prog_data) {
465      blend_state_offset = blorp_emit_blend_state(brw, params);
466      color_calc_state_offset = blorp_emit_color_calc_state(brw, params);
467   }
468   depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params);
469
470   /* 3DSTATE_CC_STATE_POINTERS
471    *
472    * The pointer offsets are relative to
473    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
474    *
475    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
476    */
477   blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
478      cc.BLEND_STATEChange = true;
479      cc.COLOR_CALC_STATEChange = true;
480      cc.DEPTH_STENCIL_STATEChange = true;
481      cc.PointertoBLEND_STATE = blend_state_offset;
482      cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
483      cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
484   }
485
486   blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
487   blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
488   blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
489
490   if (params->wm_prog_data)
491      blorp_emit_surface_states(brw, params);
492
493   if (params->src.bo)
494      blorp_emit_sampler_state(brw, params);
495
496   gen6_emit_3dstate_multisample(brw, params->dst.surf.samples);
497
498   blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
499      mask.SampleMask = (1 << params->dst.surf.samples) - 1;
500   }
501
502   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
503    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
504    *
505    *   [DevSNB] A pipeline flush must be programmed prior to a
506    *   3DSTATE_VS command that causes the VS Function Enable to
507    *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
508    *   command with CS stall bit set and a post sync operation.
509    *
510    * We've already done one at the start of the BLORP operation.
511    */
512   blorp_emit(brw, GENX(3DSTATE_VS), vs);
513   blorp_emit(brw, GENX(3DSTATE_GS), gs);
514
515   blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
516      clip.PerspectiveDivideDisable = true;
517   }
518
519   blorp_emit_sf_config(brw, params);
520   blorp_emit_wm_config(brw, params);
521
522   blorp_emit_viewport_state(brw, params);
523
524   if (params->depth.bo) {
525      blorp_emit_depth_stencil_config(brw, params);
526   } else {
527      brw_emit_depth_stall_flushes(brw);
528
529      blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
530         db.SurfaceType = SURFTYPE_NULL;
531         db.SurfaceFormat = D32_FLOAT;
532      }
533      blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
534      blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
535   }
536
537   /* 3DSTATE_CLEAR_PARAMS
538    *
539    * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
540    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
541    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
542    */
543   blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
544      clear.DepthClearValueValid = true;
545      clear.DepthClearValue = params->depth.clear_color.u32[0];
546   }
547
548   blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
549      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
550      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
551   }
552
553   blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
554      prim.VertexAccessType = SEQUENTIAL;
555      prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
556      prim.VertexCountPerInstance = 3;
557      prim.InstanceCount = params->num_layers;
558   }
559}
560