genX_blorp_exec.c revision ac08bc8ac220f22333536a9f881fde1e5607148e
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25
26#include "intel_batchbuffer.h"
27#include "intel_mipmap_tree.h"
28
29#include "brw_context.h"
30#include "brw_state.h"
31
32#include "blorp_priv.h"
33
34#include "genxml/gen_macros.h"
35
36static void *
37blorp_emit_dwords(struct brw_context *brw, unsigned n)
38{
39   intel_batchbuffer_begin(brw, n, RENDER_RING);
40   uint32_t *map = brw->batch.map_next;
41   brw->batch.map_next += n;
42   intel_batchbuffer_advance(brw);
43   return map;
44}
45
46static uint64_t
47blorp_emit_reloc(struct brw_context *brw, void *location,
48                 struct blorp_address address, uint32_t delta)
49{
50   uint32_t offset = (char *)location - (char *)brw->batch.map;
51   if (brw->gen >= 8) {
52      return intel_batchbuffer_reloc64(brw, address.buffer, offset,
53                                       address.read_domains,
54                                       address.write_domain,
55                                       address.offset + delta);
56   } else {
57      return intel_batchbuffer_reloc(brw, address.buffer, offset,
58                                     address.read_domains,
59                                     address.write_domain,
60                                     address.offset + delta);
61   }
62}
63
64static void *
65blorp_alloc_dynamic_state(struct blorp_context *blorp,
66                          enum aub_state_struct_type type,
67                          uint32_t size,
68                          uint32_t alignment,
69                          uint32_t *offset)
70{
71   struct brw_context *brw = blorp->driver_ctx;
72   return brw_state_batch(brw, type, size, alignment, offset);
73}
74
75static void *
76blorp_alloc_vertex_buffer(struct blorp_context *blorp, uint32_t size,
77                          struct blorp_address *addr)
78{
79   struct brw_context *brw = blorp->driver_ctx;
80
81   uint32_t offset;
82   void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
83                                size, 32, &offset);
84
85   *addr = (struct blorp_address) {
86      .buffer = brw->batch.bo,
87      .read_domains = I915_GEM_DOMAIN_VERTEX,
88      .write_domain = 0,
89      .offset = offset,
90   };
91
92   return data;
93}
94
95static void
96blorp_emit_urb_config(struct brw_context *brw, unsigned vs_entry_size)
97{
98#if GEN_GEN >= 7
99   if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
100       brw->urb.vsize >= vs_entry_size)
101      return;
102
103   brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
104
105   gen7_upload_urb(brw, vs_entry_size, false, false);
106#else
107   gen6_upload_urb(brw, vs_entry_size, false, 0);
108#endif
109}
110
111static void
112blorp_emit_3dstate_multisample(struct brw_context *brw, unsigned samples)
113{
114#if GEN_GEN >= 8
115   gen8_emit_3dstate_multisample(brw, samples);
116#else
117   gen6_emit_3dstate_multisample(brw, samples);
118#endif
119}
120
121#define __gen_address_type struct blorp_address
122#define __gen_user_data struct brw_context
123
124static uint64_t
125__gen_combine_address(struct brw_context *brw, void *location,
126                      struct blorp_address address, uint32_t delta)
127{
128   if (address.buffer == NULL) {
129      return address.offset + delta;
130   } else {
131      return blorp_emit_reloc(brw, location, address, delta);
132   }
133}
134
135#include "genxml/genX_pack.h"
136
137#define _blorp_cmd_length(cmd) cmd ## _length
138#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
139#define _blorp_cmd_header(cmd) cmd ## _header
140#define _blorp_cmd_pack(cmd) cmd ## _pack
141
142#define blorp_emit(brw, cmd, name)                                \
143   for (struct cmd name = { _blorp_cmd_header(cmd) },             \
144        *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd));   \
145        __builtin_expect(_dst != NULL, 1);                        \
146        _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name),           \
147        _dst = NULL)
148
149#define blorp_emitn(batch, cmd, n) ({                    \
150      uint32_t *_dw = blorp_emit_dwords(batch, n);       \
151      struct cmd template = {                            \
152         _blorp_cmd_header(cmd),                         \
153         .DWordLength = n - _blorp_cmd_length_bias(cmd), \
154      };                                                 \
155      _blorp_cmd_pack(cmd)(batch, _dw, &template);       \
156      _dw + 1; /* Array starts at dw[1] */               \
157   })
158
159/* Once vertex fetcher has written full VUE entries with complete
160 * header the space requirement is as follows per vertex (in bytes):
161 *
162 *     Header    Position    Program constants
163 *   +--------+------------+-------------------+
164 *   |   16   |     16     |      n x 16       |
165 *   +--------+------------+-------------------+
166 *
167 * where 'n' stands for number of varying inputs expressed as vec4s.
168 *
169 * The URB size is in turn expressed in 64 bytes (512 bits).
170 */
171static inline unsigned
172gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
173{
174    const unsigned num_varyings =
175       params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
176    const unsigned total_needed = 16 + 16 + num_varyings * 16;
177
178   return DIV_ROUND_UP(total_needed, 64);
179}
180
181/* 3DSTATE_URB
182 * 3DSTATE_URB_VS
183 * 3DSTATE_URB_HS
184 * 3DSTATE_URB_DS
185 * 3DSTATE_URB_GS
186 *
187 * Assign the entire URB to the VS. Even though the VS disabled, URB space
188 * is still needed because the clipper loads the VUE's from the URB. From
189 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
190 * Dword 1.15:0 "VS Number of URB Entries":
191 *     This field is always used (even if VS Function Enable is DISABLED).
192 *
193 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
194 * safely ignore it because this batch contains only one draw call.
195 *     Because of URB corruption caused by allocating a previous GS unit
196 *     URB entry to the VS unit, software is required to send a “GS NULL
197 *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
198 *     plus a dummy DRAW call before any case where VS will be taking over
199 *     GS URB space.
200 *
201 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
202 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
203 *
204 *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
205 *     programmed in order for the programming of this state to be
206 *     valid.
207 */
208static void
209emit_urb_config(struct brw_context *brw,
210                const struct brw_blorp_params *params)
211{
212   blorp_emit_urb_config(brw, gen7_blorp_get_vs_entry_size(params));
213}
214
215static void
216blorp_emit_vertex_data(struct brw_context *brw,
217                       const struct brw_blorp_params *params,
218                       struct blorp_address *addr,
219                       uint32_t *size)
220{
221   const float vertices[] = {
222      /* v0 */ (float)params->x0, (float)params->y1,
223      /* v1 */ (float)params->x1, (float)params->y1,
224      /* v2 */ (float)params->x0, (float)params->y0,
225   };
226
227   void *data = blorp_alloc_vertex_buffer(&brw->blorp, sizeof(vertices), addr);
228   memcpy(data, vertices, sizeof(vertices));
229   *size = sizeof(vertices);
230}
231
232static void
233blorp_emit_input_varying_data(struct brw_context *brw,
234                              const struct brw_blorp_params *params,
235                              struct blorp_address *addr,
236                              uint32_t *size)
237{
238   const unsigned vec4_size_in_bytes = 4 * sizeof(float);
239   const unsigned max_num_varyings =
240      DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
241   const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
242
243   *size = num_varyings * vec4_size_in_bytes;
244
245   const float *const inputs_src = (const float *)&params->wm_inputs;
246   float *inputs = blorp_alloc_vertex_buffer(&brw->blorp, *size, addr);
247
248   /* Walk over the attribute slots, determine if the attribute is used by
249    * the program and when necessary copy the values from the input storage to
250    * the vertex data buffer.
251    */
252   for (unsigned i = 0; i < max_num_varyings; i++) {
253      const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
254
255      if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
256         continue;
257
258      memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
259
260      inputs += 4;
261   }
262}
263
264static void
265blorp_emit_vertex_buffers(struct brw_context *brw,
266                          const struct brw_blorp_params *params)
267{
268   struct GENX(VERTEX_BUFFER_STATE) vb[2];
269   memset(vb, 0, sizeof(vb));
270
271   unsigned num_buffers = 1;
272
273   uint32_t size;
274   blorp_emit_vertex_data(brw, params, &vb[0].BufferStartingAddress, &size);
275   vb[0].VertexBufferIndex = 0;
276   vb[0].BufferPitch = 2 * sizeof(float);
277   vb[0].VertexBufferMOCS = brw->blorp.mocs.vb;
278#if GEN_GEN >= 7
279   vb[0].AddressModifyEnable = true;
280#endif
281#if GEN_GEN >= 8
282   vb[0].BufferSize = size;
283#else
284   vb[0].BufferAccessType = VERTEXDATA;
285   vb[0].EndAddress = vb[0].BufferStartingAddress;
286   vb[0].EndAddress.offset += size - 1;
287#endif
288
289   if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
290      blorp_emit_input_varying_data(brw, params,
291                                    &vb[1].BufferStartingAddress, &size);
292      vb[1].VertexBufferIndex = 1;
293      vb[1].BufferPitch = 0;
294      vb[1].VertexBufferMOCS = brw->blorp.mocs.vb;
295#if GEN_GEN >= 7
296      vb[1].AddressModifyEnable = true;
297#endif
298#if GEN_GEN >= 8
299      vb[1].BufferSize = size;
300#else
301      vb[1].BufferAccessType = INSTANCEDATA;
302      vb[1].EndAddress = vb[1].BufferStartingAddress;
303      vb[1].EndAddress.offset += size - 1;
304#endif
305      num_buffers++;
306   }
307
308   const unsigned num_dwords =
309      1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
310   uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
311
312   for (unsigned i = 0; i < num_buffers; i++) {
313      GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &vb[i]);
314      dw += GENX(VERTEX_BUFFER_STATE_length);
315   }
316}
317
318static void
319blorp_emit_vertex_elements(struct brw_context *brw,
320                           const struct brw_blorp_params *params)
321{
322   const unsigned num_varyings =
323      params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
324   const unsigned num_elements = 2 + num_varyings;
325
326   struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
327   memset(ve, 0, num_elements * sizeof(*ve));
328
329   /* Setup VBO for the rectangle primitive..
330    *
331    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
332    * vertices. The vertices reside in screen space with DirectX
333    * coordinates (that is, (0, 0) is the upper left corner).
334    *
335    *   v2 ------ implied
336    *    |        |
337    *    |        |
338    *   v0 ----- v1
339    *
340    * Since the VS is disabled, the clipper loads each VUE directly from
341    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
342    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
343    *   dw0: Reserved, MBZ.
344    *   dw1: Render Target Array Index. The HiZ op does not use indexed
345    *        vertices, so set the dword to 0.
346    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
347    *        scissoring, so set the dword to 0.
348    *   dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
349    *        so set the dword to 0.
350    *   dw4: Vertex Position X.
351    *   dw5: Vertex Position Y.
352    *   dw6: Vertex Position Z.
353    *   dw7: Vertex Position W.
354    *
355    *   dw8: Flat vertex input 0
356    *   dw9: Flat vertex input 1
357    *   ...
358    *   dwn: Flat vertex input n - 8
359    *
360    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
361    * "Vertex URB Entry (VUE) Formats".
362    *
363    * Only vertex position X and Y are going to be variable, Z is fixed to
364    * zero and W to one. Header words dw0-3 are all zero. There is no need to
365    * include the fixed values in the vertex buffer. Vertex fetcher can be
366    * instructed to fill vertex elements with constant values of one and zero
367    * instead of reading them from the buffer.
368    * Flat inputs are program constants that are not interpolated. Moreover
369    * their values will be the same between vertices.
370    *
371    * See the vertex element setup below.
372    */
373   ve[0].VertexBufferIndex = 0;
374   ve[0].Valid = true;
375   ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
376   ve[0].SourceElementOffset = 0;
377   ve[0].Component0Control = VFCOMP_STORE_0;
378   ve[0].Component1Control = VFCOMP_STORE_0;
379   ve[0].Component2Control = VFCOMP_STORE_0;
380   ve[0].Component3Control = VFCOMP_STORE_0;
381
382   ve[1].VertexBufferIndex = 0;
383   ve[1].Valid = true;
384   ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
385   ve[1].SourceElementOffset = 0;
386   ve[1].Component0Control = VFCOMP_STORE_SRC;
387   ve[1].Component1Control = VFCOMP_STORE_SRC;
388   ve[1].Component2Control = VFCOMP_STORE_0;
389   ve[1].Component3Control = VFCOMP_STORE_1_FP;
390
391   for (unsigned i = 0; i < num_varyings; ++i) {
392      ve[i + 2].VertexBufferIndex = 1;
393      ve[i + 2].Valid = true;
394      ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
395      ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
396      ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
397      ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
398      ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
399      ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
400   }
401
402   const unsigned num_dwords =
403      1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
404   uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
405
406   for (unsigned i = 0; i < num_elements; i++) {
407      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &ve[i]);
408      dw += GENX(VERTEX_ELEMENT_STATE_length);
409   }
410
411#if GEN_GEN >= 8
412   blorp_emit(brw, GENX(3DSTATE_VF_SGVS), sgvs);
413
414   for (unsigned i = 0; i < num_elements; i++) {
415      blorp_emit(brw, GENX(3DSTATE_VF_INSTANCING), vf) {
416         vf.VertexElementIndex = i;
417         vf.InstancingEnable = false;
418      }
419   }
420
421   blorp_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), topo) {
422      topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
423   }
424#endif
425}
426
427static void
428blorp_emit_sf_config(struct brw_context *brw,
429                     const struct brw_blorp_params *params)
430{
431   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
432
433   /* 3DSTATE_SF
434    *
435    * Disable ViewportTransformEnable (dw2.1)
436    *
437    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
438    * Primitives Overview":
439    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
440    *     use of screen- space coordinates).
441    *
442    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
443    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
444    *
445    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
446    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
447    *     SOLID: Any triangle or rectangle object found to be front-facing
448    *     is rendered as a solid object. This setting is required when
449    *     (rendering rectangle (RECTLIST) objects.
450    */
451
452#if GEN_GEN >= 8
453
454   blorp_emit(brw, GENX(3DSTATE_SF), sf);
455
456   blorp_emit(brw, GENX(3DSTATE_RASTER), raster) {
457      raster.CullMode = CULLMODE_NONE;
458   }
459
460   blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
461      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
462      sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
463      sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
464      sbe.ForceVertexURBEntryReadLength = true;
465      sbe.ForceVertexURBEntryReadOffset = true;
466      sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
467
468#if GEN_GEN >= 9
469      for (unsigned i = 0; i < 32; i++)
470         sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
471#endif
472   }
473
474#elif GEN_GEN >= 7
475
476   blorp_emit(brw, GENX(3DSTATE_SF), sf) {
477      sf.FrontFaceFillMode = FILL_MODE_SOLID;
478      sf.BackFaceFillMode = FILL_MODE_SOLID;
479
480      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
481         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
482
483#if GEN_GEN == 7
484      sf.DepthBufferSurfaceFormat = params->depth_format;
485#endif
486   }
487
488   blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
489      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
490      if (prog_data) {
491         sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
492         sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
493         sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
494      } else {
495         sbe.NumberofSFOutputAttributes = 0;
496         sbe.VertexURBEntryReadLength = 1;
497      }
498   }
499
500#else /* GEN_GEN <= 6 */
501
502   blorp_emit(brw, GENX(3DSTATE_SF), sf) {
503      sf.FrontFaceFillMode = FILL_MODE_SOLID;
504      sf.BackFaceFillMode = FILL_MODE_SOLID;
505
506      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
507         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
508
509      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
510      if (prog_data) {
511         sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
512         sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
513         sf.ConstantInterpolationEnable = prog_data->flat_inputs;
514      } else {
515         sf.NumberofSFOutputAttributes = 0;
516         sf.VertexURBEntryReadLength = 1;
517      }
518   }
519
520#endif /* GEN_GEN */
521}
522
523static void
524blorp_emit_ps_config(struct brw_context *brw,
525                     const struct brw_blorp_params *params)
526{
527   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
528
529   /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
530    * nonzero to prevent the GPU from hanging.  While the documentation doesn't
531    * mention this explicitly, it notes that the valid range for the field is
532    * [1,39] = [2,40] threads, which excludes zero.
533    *
534    * To be safe (and to minimize extraneous code) we go ahead and fully
535    * configure the WM state whether or not there is a WM program.
536    */
537
538#if GEN_GEN >= 8
539
540   blorp_emit(brw, GENX(3DSTATE_WM), wm);
541
542   blorp_emit(brw, GENX(3DSTATE_PS), ps) {
543      if (params->src.addr.buffer) {
544         ps.SamplerCount = 1; /* Up to 4 samplers */
545         ps.BindingTableEntryCount = 2;
546      } else {
547         ps.BindingTableEntryCount = 1;
548      }
549
550      ps.DispatchGRFStartRegisterForConstantSetupData0 =
551         prog_data->first_curbe_grf_0;
552      ps.DispatchGRFStartRegisterForConstantSetupData2 =
553         prog_data->first_curbe_grf_2;
554
555      ps._8PixelDispatchEnable = prog_data->dispatch_8;
556      ps._16PixelDispatchEnable = prog_data->dispatch_16;
557
558      ps.KernelStartPointer0 = params->wm_prog_kernel;
559      ps.KernelStartPointer2 =
560         params->wm_prog_kernel + prog_data->ksp_offset_2;
561
562      /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
563       * it implicitly scales for different GT levels (which have some # of
564       * PSDs).
565       *
566       * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
567       */
568      if (GEN_GEN >= 9)
569         ps.MaximumNumberofThreadsPerPSD = 64 - 1;
570      else
571         ps.MaximumNumberofThreadsPerPSD = 64 - 2;
572
573      switch (params->fast_clear_op) {
574#if GEN_GEN >= 9
575      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
576         ps.RenderTargetResolveType = RESOLVE_PARTIAL;
577         break;
578      case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
579         ps.RenderTargetResolveType = RESOLVE_FULL;
580         break;
581#else
582      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
583         ps.RenderTargetResolveEnable = true;
584         break;
585#endif
586      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
587         ps.RenderTargetFastClearEnable = true;
588         break;
589      }
590   }
591
592   blorp_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
593      psx.PixelShaderValid = true;
594
595      if (params->src.addr.buffer)
596         psx.PixelShaderKillsPixel = true;
597
598      psx.AttributeEnable = prog_data->num_varying_inputs > 0;
599
600      if (prog_data && prog_data->persample_msaa_dispatch)
601         psx.PixelShaderIsPerSample = true;
602   }
603
604#elif GEN_GEN >= 7
605
606   blorp_emit(brw, GENX(3DSTATE_WM), wm) {
607      switch (params->hiz_op) {
608      case GEN6_HIZ_OP_DEPTH_CLEAR:
609         wm.DepthBufferClear = true;
610         break;
611      case GEN6_HIZ_OP_DEPTH_RESOLVE:
612         wm.DepthBufferResolveEnable = true;
613         break;
614      case GEN6_HIZ_OP_HIZ_RESOLVE:
615         wm.HierarchicalDepthBufferResolveEnable = true;
616         break;
617      case GEN6_HIZ_OP_NONE:
618         break;
619      default:
620         unreachable("not reached");
621      }
622
623      if (prog_data)
624         wm.ThreadDispatchEnable = true;
625
626      if (params->src.addr.buffer)
627         wm.PixelShaderKillPixel = true;
628
629      if (params->dst.surf.samples > 1) {
630         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
631         wm.MultisampleDispatchMode =
632            (prog_data && prog_data->persample_msaa_dispatch) ?
633            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
634      } else {
635         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
636         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
637      }
638   }
639
640   blorp_emit(brw, GENX(3DSTATE_PS), ps) {
641      ps.MaximumNumberofThreads = brw->max_wm_threads - 1;
642
643#if GEN_IS_HASWELL
644      ps.SampleMask = 1;
645#endif
646
647      if (prog_data) {
648         ps.DispatchGRFStartRegisterforConstantSetupData0 =
649            prog_data->first_curbe_grf_0;
650         ps.DispatchGRFStartRegisterforConstantSetupData2 =
651            prog_data->first_curbe_grf_2;
652
653         ps.KernelStartPointer0 = params->wm_prog_kernel;
654         ps.KernelStartPointer2 =
655            params->wm_prog_kernel + prog_data->ksp_offset_2;
656
657         ps._8PixelDispatchEnable = prog_data->dispatch_8;
658         ps._16PixelDispatchEnable = prog_data->dispatch_16;
659
660         ps.AttributeEnable = prog_data->num_varying_inputs > 0;
661      } else {
662         /* Gen7 hardware gets angry if we don't enable at least one dispatch
663          * mode, so just enable 16-pixel dispatch if we don't have a program.
664          */
665         ps._16PixelDispatchEnable = true;
666      }
667
668      if (params->src.addr.buffer)
669         ps.SamplerCount = 1; /* Up to 4 samplers */
670
671      switch (params->fast_clear_op) {
672      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
673         ps.RenderTargetResolveEnable = true;
674         break;
675      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
676         ps.RenderTargetFastClearEnable = true;
677         break;
678      }
679   }
680
681#else /* GEN_GEN <= 6 */
682
683   blorp_emit(brw, GENX(3DSTATE_WM), wm) {
684      wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
685
686      switch (params->hiz_op) {
687      case GEN6_HIZ_OP_DEPTH_CLEAR:
688         wm.DepthBufferClear = true;
689         break;
690      case GEN6_HIZ_OP_DEPTH_RESOLVE:
691         wm.DepthBufferResolveEnable = true;
692         break;
693      case GEN6_HIZ_OP_HIZ_RESOLVE:
694         wm.HierarchicalDepthBufferResolveEnable = true;
695         break;
696      case GEN6_HIZ_OP_NONE:
697         break;
698      default:
699         unreachable("not reached");
700      }
701
702      if (prog_data) {
703         wm.ThreadDispatchEnable = true;
704
705         wm.DispatchGRFStartRegisterforConstantSetupData0 =
706            prog_data->first_curbe_grf_0;
707         wm.DispatchGRFStartRegisterforConstantSetupData2 =
708            prog_data->first_curbe_grf_2;
709
710         wm.KernelStartPointer0 = params->wm_prog_kernel;
711         wm.KernelStartPointer2 =
712            params->wm_prog_kernel + prog_data->ksp_offset_2;
713
714         wm._8PixelDispatchEnable = prog_data->dispatch_8;
715         wm._16PixelDispatchEnable = prog_data->dispatch_16;
716
717         wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
718      }
719
720      if (params->src.addr.buffer) {
721         wm.SamplerCount = 1; /* Up to 4 samplers */
722         wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
723      }
724
725      if (params->dst.surf.samples > 1) {
726         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
727         wm.MultisampleDispatchMode =
728            (prog_data && prog_data->persample_msaa_dispatch) ?
729            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
730      } else {
731         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
732         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
733      }
734   }
735
736#endif /* GEN_GEN */
737}
738
739
740static void
741blorp_emit_depth_stencil_config(struct brw_context *brw,
742                                const struct brw_blorp_params *params)
743{
744   brw_emit_depth_stall_flushes(brw);
745
746#if GEN_GEN >= 7
747   const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
748#else
749   const uint32_t mocs = 0;
750#endif
751
752   blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
753      switch (params->depth.surf.dim) {
754      case ISL_SURF_DIM_1D:
755         db.SurfaceType = SURFTYPE_1D;
756         break;
757      case ISL_SURF_DIM_2D:
758         db.SurfaceType = SURFTYPE_2D;
759         break;
760      case ISL_SURF_DIM_3D:
761         db.SurfaceType = SURFTYPE_3D;
762         break;
763      }
764
765      db.SurfaceFormat = params->depth_format;
766
767#if GEN_GEN >= 7
768      db.DepthWriteEnable = true;
769#endif
770
771#if GEN_GEN <= 6
772      db.TiledSurface = true;
773      db.TileWalk = TILEWALK_YMAJOR;
774      db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
775      db.SeparateStencilBufferEnable = true;
776#endif
777
778      db.HierarchicalDepthBufferEnable = true;
779
780      db.Width = params->depth.surf.logical_level0_px.width - 1;
781      db.Height = params->depth.surf.logical_level0_px.height - 1;
782      db.RenderTargetViewExtent = db.Depth =
783         MAX2(params->depth.surf.logical_level0_px.depth,
784              params->depth.surf.logical_level0_px.array_len) - 1;
785
786      db.LOD = params->depth.view.base_level;
787      db.MinimumArrayElement = params->depth.view.base_array_layer;
788
789      db.SurfacePitch = params->depth.surf.row_pitch - 1;
790      db.SurfaceBaseAddress = params->depth.addr;
791      db.DepthBufferMOCS = mocs;
792   }
793
794   blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
795      hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
796      hiz.SurfaceBaseAddress = params->depth.aux_addr;
797      hiz.HierarchicalDepthBufferMOCS = mocs;
798   }
799
800   blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
801}
802
803static uint32_t
804blorp_emit_blend_state(struct brw_context *brw,
805                       const struct brw_blorp_params *params)
806{
807   struct GENX(BLEND_STATE) blend;
808   memset(&blend, 0, sizeof(blend));
809
810   for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
811      blend.Entry[i].PreBlendColorClampEnable = true;
812      blend.Entry[i].PostBlendColorClampEnable = true;
813      blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
814
815      blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
816      blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
817      blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
818      blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
819   }
820
821   uint32_t offset;
822   void *state = blorp_alloc_dynamic_state(&brw->blorp,
823                                           AUB_TRACE_BLEND_STATE,
824                                           GENX(BLEND_STATE_length) * 4,
825                                           64, &offset);
826   GENX(BLEND_STATE_pack)(NULL, state, &blend);
827
828#if GEN_GEN >= 7
829   blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
830      sp.BlendStatePointer = offset;
831#if GEN_GEN >= 8
832      sp.BlendStatePointerValid = true;
833#endif
834   }
835#endif
836
837#if GEN_GEN >= 8
838   blorp_emit(brw, GENX(3DSTATE_PS_BLEND), ps_blend) {
839      ps_blend.HasWriteableRT = true;
840   }
841#endif
842
843   return offset;
844}
845
846static uint32_t
847blorp_emit_color_calc_state(struct brw_context *brw,
848                            const struct brw_blorp_params *params)
849{
850   uint32_t offset;
851   void *state = blorp_alloc_dynamic_state(&brw->blorp,
852                                           AUB_TRACE_CC_STATE,
853                                           GENX(COLOR_CALC_STATE_length) * 4,
854                                           64, &offset);
855   memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
856
857#if GEN_GEN >= 7
858   blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
859      sp.ColorCalcStatePointer = offset;
860#if GEN_GEN >= 8
861      sp.ColorCalcStatePointerValid = true;
862#endif
863   }
864#endif
865
866   return offset;
867}
868
869static uint32_t
870blorp_emit_depth_stencil_state(struct brw_context *brw,
871                               const struct brw_blorp_params *params)
872{
873#if GEN_GEN >= 8
874
875   /* On gen8+, DEPTH_STENCIL state is simply an instruction */
876   blorp_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
877   return 0;
878
879#else /* GEN_GEN <= 7 */
880
881   /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
882    *   - 7.5.3.1 Depth Buffer Clear
883    *   - 7.5.3.2 Depth Buffer Resolve
884    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
885    */
886   struct GENX(DEPTH_STENCIL_STATE) ds = {
887      .DepthBufferWriteEnable = true,
888   };
889
890   if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
891      ds.DepthTestEnable = true;
892      ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
893   }
894
895   uint32_t offset;
896   void *state = blorp_alloc_dynamic_state(&brw->blorp,
897                                           AUB_TRACE_DEPTH_STENCIL_STATE,
898                                           GENX(DEPTH_STENCIL_STATE_length) * 4,
899                                           64, &offset);
900   GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
901
902#if GEN_GEN >= 7
903   blorp_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
904      sp.PointertoDEPTH_STENCIL_STATE = offset;
905   }
906#endif
907
908   return offset;
909
910#endif /* GEN_GEN */
911}
912
913struct surface_state_info {
914   unsigned num_dwords;
915   unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
916   unsigned reloc_dw;
917   unsigned aux_reloc_dw;
918};
919
920static const struct surface_state_info surface_state_infos[] = {
921   [6] = {6,  32, 1,  0},
922   [7] = {8,  32, 1,  6},
923   [8] = {13, 64, 8,  10},
924   [9] = {16, 64, 8,  10},
925};
926
927static uint32_t
928blorp_emit_surface_state(struct brw_context *brw,
929                         const struct brw_blorp_surface_info *surface,
930                         bool is_render_target)
931{
932   const struct surface_state_info ss_info = surface_state_infos[brw->gen];
933
934   struct isl_surf surf = surface->surf;
935
936   if (surf.dim == ISL_SURF_DIM_1D &&
937       surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
938      assert(surf.logical_level0_px.height == 1);
939      surf.dim = ISL_SURF_DIM_2D;
940   }
941
942   /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */
943   enum isl_aux_usage aux_usage = surface->aux_usage;
944   if (aux_usage == ISL_AUX_USAGE_HIZ)
945      aux_usage = ISL_AUX_USAGE_NONE;
946
947   uint32_t surf_offset;
948   uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
949                                  ss_info.num_dwords * 4, ss_info.ss_align,
950                                  &surf_offset);
951
952   const uint32_t mocs =
953      is_render_target ? brw->blorp.mocs.rb : brw->blorp.mocs.tex;
954   uint64_t aux_bo_offset =
955      surface->aux_addr.buffer ? surface->aux_addr.buffer->offset64 : 0;
956
957   isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = &surface->view,
958                       .address = surface->addr.buffer->offset64 + surface->addr.offset,
959                       .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
960                       .aux_address = aux_bo_offset + surface->aux_addr.offset,
961                       .mocs = mocs, .clear_color = surface->clear_color,
962                       .x_offset_sa = surface->tile_x_sa,
963                       .y_offset_sa = surface->tile_y_sa);
964
965   /* Emit relocation to surface contents */
966   drm_intel_bo_emit_reloc(brw->batch.bo,
967                           surf_offset + ss_info.reloc_dw * 4,
968                           surface->addr.buffer,
969                           dw[ss_info.reloc_dw] - surface->addr.buffer->offset64,
970                           surface->addr.read_domains,
971                           surface->addr.write_domain);
972
973   if (aux_usage != ISL_AUX_USAGE_NONE) {
974      /* On gen7 and prior, the bottom 12 bits of the MCS base address are
975       * used to store other information.  This should be ok, however, because
976       * surface buffer addresses are always 4K page alinged.
977       */
978      assert((surface->aux_addr.offset & 0xfff) == 0);
979      drm_intel_bo_emit_reloc(brw->batch.bo,
980                              surf_offset + ss_info.aux_reloc_dw * 4,
981                              surface->aux_addr.buffer,
982                              dw[ss_info.aux_reloc_dw] & 0xfff,
983                              surface->aux_addr.read_domains,
984                              surface->aux_addr.write_domain);
985   }
986
987   return surf_offset;
988}
989
990static void
991blorp_emit_surface_states(struct brw_context *brw,
992                          const struct brw_blorp_params *params)
993{
994   uint32_t bind_offset;
995   uint32_t *bind =
996      brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
997                      sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
998                      32, /* alignment */ &bind_offset);
999
1000   bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
1001      blorp_emit_surface_state(brw, &params->dst, true);
1002   if (params->src.addr.buffer) {
1003      bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] =
1004         blorp_emit_surface_state(brw, &params->src, false);
1005   }
1006
1007#if GEN_GEN >= 7
1008   blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
1009      bt.PointertoPSBindingTable = bind_offset;
1010   }
1011#else
1012   blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
1013      bt.PSBindingTableChange = true;
1014      bt.PointertoPSBindingTable = bind_offset;
1015   }
1016#endif
1017}
1018
1019static void
1020blorp_emit_sampler_state(struct brw_context *brw,
1021                         const struct brw_blorp_params *params)
1022{
1023   struct GENX(SAMPLER_STATE) sampler = {
1024      .MipModeFilter = MIPFILTER_NONE,
1025      .MagModeFilter = MAPFILTER_LINEAR,
1026      .MinModeFilter = MAPFILTER_LINEAR,
1027      .MinLOD = 0,
1028      .MaxLOD = 0,
1029      .TCXAddressControlMode = TCM_CLAMP,
1030      .TCYAddressControlMode = TCM_CLAMP,
1031      .TCZAddressControlMode = TCM_CLAMP,
1032      .MaximumAnisotropy = RATIO21,
1033      .RAddressMinFilterRoundingEnable = true,
1034      .RAddressMagFilterRoundingEnable = true,
1035      .VAddressMinFilterRoundingEnable = true,
1036      .VAddressMagFilterRoundingEnable = true,
1037      .UAddressMinFilterRoundingEnable = true,
1038      .UAddressMagFilterRoundingEnable = true,
1039      .NonnormalizedCoordinateEnable = true,
1040   };
1041
1042   uint32_t offset;
1043   void *state = blorp_alloc_dynamic_state(&brw->blorp,
1044                                           AUB_TRACE_SAMPLER_STATE,
1045                                           GENX(SAMPLER_STATE_length) * 4,
1046                                           32, &offset);
1047   GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
1048
1049#if GEN_GEN >= 7
1050   blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
1051      ssp.PointertoPSSamplerState = offset;
1052   }
1053#else
1054   blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
1055      ssp.VSSamplerStateChange = true;
1056      ssp.GSSamplerStateChange = true;
1057      ssp.PSSamplerStateChange = true;
1058      ssp.PointertoPSSamplerState = offset;
1059   }
1060#endif
1061}
1062
1063/* 3DSTATE_VIEWPORT_STATE_POINTERS */
1064static void
1065blorp_emit_viewport_state(struct brw_context *brw,
1066                          const struct brw_blorp_params *params)
1067{
1068   uint32_t cc_vp_offset;
1069
1070   void *state = blorp_alloc_dynamic_state(&brw->blorp,
1071                                           AUB_TRACE_CC_VP_STATE,
1072                                           GENX(CC_VIEWPORT_length) * 4, 32,
1073                                           &cc_vp_offset);
1074
1075   GENX(CC_VIEWPORT_pack)(brw, state,
1076      &(struct GENX(CC_VIEWPORT)) {
1077         .MinimumDepth = 0.0,
1078         .MaximumDepth = 1.0,
1079      });
1080
1081#if GEN_GEN >= 7
1082   blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1083      vsp.CCViewportPointer = cc_vp_offset;
1084   }
1085#else
1086   blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1087      vsp.CCViewportStateChange = true;
1088      vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1089   }
1090#endif
1091}
1092
1093
1094/**
1095 * \brief Execute a blit or render pass operation.
1096 *
1097 * To execute the operation, this function manually constructs and emits a
1098 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1099 * constructing and after emitting the batch.
1100 *
1101 * This function alters no GL state.
1102 */
1103void
1104genX(blorp_exec)(struct brw_context *brw,
1105                 const struct brw_blorp_params *params)
1106{
1107   uint32_t blend_state_offset = 0;
1108   uint32_t color_calc_state_offset = 0;
1109   uint32_t depth_stencil_state_offset;
1110
1111#if GEN_GEN == 6
1112   /* Emit workaround flushes when we switch from drawing to blorping. */
1113   brw_emit_post_sync_nonzero_flush(brw);
1114#endif
1115
1116   brw_upload_state_base_address(brw);
1117
1118#if GEN_GEN >= 8
1119   gen7_l3_state.emit(brw);
1120#endif
1121
1122   blorp_emit_vertex_buffers(brw, params);
1123   blorp_emit_vertex_elements(brw, params);
1124
1125   emit_urb_config(brw, params);
1126
1127   if (params->wm_prog_data) {
1128      blend_state_offset = blorp_emit_blend_state(brw, params);
1129      color_calc_state_offset = blorp_emit_color_calc_state(brw, params);
1130   }
1131   depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params);
1132
1133#if GEN_GEN <= 6
1134   /* 3DSTATE_CC_STATE_POINTERS
1135    *
1136    * The pointer offsets are relative to
1137    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1138    *
1139    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1140    *
1141    * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1142    * gen7+.  However, on gen6 and earlier, they're all lumpped together in
1143    * one CC_STATE_POINTERS packet so we have to emit that here.
1144    */
1145   blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1146      cc.BLEND_STATEChange = true;
1147      cc.COLOR_CALC_STATEChange = true;
1148      cc.DEPTH_STENCIL_STATEChange = true;
1149      cc.PointertoBLEND_STATE = blend_state_offset;
1150      cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1151      cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1152   }
1153#else
1154   (void)blend_state_offset;
1155   (void)color_calc_state_offset;
1156   (void)depth_stencil_state_offset;
1157#endif
1158
1159   blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
1160#if GEN_GEN >= 7
1161   blorp_emit(brw, GENX(3DSTATE_CONSTANT_HS), hs);
1162   blorp_emit(brw, GENX(3DSTATE_CONSTANT_DS), DS);
1163#endif
1164   blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
1165   blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
1166
1167   if (brw->use_resource_streamer)
1168      gen7_disable_hw_binding_tables(brw);
1169
1170   if (params->wm_prog_data)
1171      blorp_emit_surface_states(brw, params);
1172
1173   if (params->src.addr.buffer)
1174      blorp_emit_sampler_state(brw, params);
1175
1176   blorp_emit_3dstate_multisample(brw, params->dst.surf.samples);
1177
1178   blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
1179      mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1180   }
1181
1182   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1183    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1184    *
1185    *   [DevSNB] A pipeline flush must be programmed prior to a
1186    *   3DSTATE_VS command that causes the VS Function Enable to
1187    *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1188    *   command with CS stall bit set and a post sync operation.
1189    *
1190    * We've already done one at the start of the BLORP operation.
1191    */
1192   blorp_emit(brw, GENX(3DSTATE_VS), vs);
1193#if GEN_GEN >= 7
1194   blorp_emit(brw, GENX(3DSTATE_HS), hs);
1195   blorp_emit(brw, GENX(3DSTATE_TE), te);
1196   blorp_emit(brw, GENX(3DSTATE_DS), DS);
1197   blorp_emit(brw, GENX(3DSTATE_STREAMOUT), so);
1198#endif
1199   blorp_emit(brw, GENX(3DSTATE_GS), gs);
1200
1201   blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
1202      clip.PerspectiveDivideDisable = true;
1203   }
1204
1205   blorp_emit_sf_config(brw, params);
1206   blorp_emit_ps_config(brw, params);
1207
1208   blorp_emit_viewport_state(brw, params);
1209
1210   if (params->depth.addr.buffer) {
1211      blorp_emit_depth_stencil_config(brw, params);
1212   } else {
1213      brw_emit_depth_stall_flushes(brw);
1214
1215      blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
1216         db.SurfaceType = SURFTYPE_NULL;
1217         db.SurfaceFormat = D32_FLOAT;
1218      }
1219      blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1220      blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
1221   }
1222
1223   /* 3DSTATE_CLEAR_PARAMS
1224    *
1225    * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1226    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1227    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1228    */
1229   blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1230      clear.DepthClearValueValid = true;
1231      clear.DepthClearValue = params->depth.clear_color.u32[0];
1232   }
1233
1234   blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1235      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1236      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1237   }
1238
1239   blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
1240      prim.VertexAccessType = SEQUENTIAL;
1241      prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1242      prim.VertexCountPerInstance = 3;
1243      prim.InstanceCount = params->num_layers;
1244   }
1245}
1246