1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32
33
34#include "intel_batchbuffer.h"
35#include "intel_fbo.h"
36#include "intel_mipmap_tree.h"
37
38#include "brw_context.h"
39#include "brw_state.h"
40#include "brw_defines.h"
41
42#include "main/framebuffer.h"
43#include "main/fbobject.h"
44#include "main/glformats.h"
45
46/* Constant single cliprect for framebuffer object or DRI2 drawing */
47static void
48upload_drawing_rect(struct brw_context *brw)
49{
50   struct gl_context *ctx = &brw->ctx;
51   const struct gl_framebuffer *fb = ctx->DrawBuffer;
52   const unsigned int fb_width = _mesa_geometric_width(fb);
53   const unsigned int fb_height = _mesa_geometric_height(fb);
54
55   BEGIN_BATCH(4);
56   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
57   OUT_BATCH(0); /* xmin, ymin */
58   OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16));
59   OUT_BATCH(0);
60   ADVANCE_BATCH();
61}
62
63const struct brw_tracked_state brw_drawing_rect = {
64   .dirty = {
65      .mesa = _NEW_BUFFERS,
66      .brw = BRW_NEW_BLORP |
67             BRW_NEW_CONTEXT,
68   },
69   .emit = upload_drawing_rect
70};
71
72/**
73 * Upload pointers to the per-stage state.
74 *
75 * The state pointers in this packet are all relative to the general state
76 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
77 */
78static void
79upload_pipelined_state_pointers(struct brw_context *brw)
80{
81   if (brw->gen == 5) {
82      /* Need to flush before changing clip max threads for errata. */
83      BEGIN_BATCH(1);
84      OUT_BATCH(MI_FLUSH);
85      ADVANCE_BATCH();
86   }
87
88   BEGIN_BATCH(7);
89   OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
90   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
91	     brw->vs.base.state_offset);
92   if (brw->ff_gs.prog_active)
93      OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
94		brw->ff_gs.state_offset | 1);
95   else
96      OUT_BATCH(0);
97   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
98	     brw->clip.state_offset | 1);
99   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
100	     brw->sf.state_offset);
101   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
102	     brw->wm.base.state_offset);
103   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
104	     brw->cc.state_offset);
105   ADVANCE_BATCH();
106
107   brw->ctx.NewDriverState |= BRW_NEW_PSP;
108}
109
110static void
111upload_psp_urb_cbs(struct brw_context *brw)
112{
113   upload_pipelined_state_pointers(brw);
114   brw_upload_urb_fence(brw);
115   brw_upload_cs_urb_state(brw);
116}
117
118const struct brw_tracked_state brw_psp_urb_cbs = {
119   .dirty = {
120      .mesa = 0,
121      .brw = BRW_NEW_BATCH |
122             BRW_NEW_BLORP |
123             BRW_NEW_FF_GS_PROG_DATA |
124             BRW_NEW_GEN4_UNIT_STATE |
125             BRW_NEW_STATE_BASE_ADDRESS |
126             BRW_NEW_URB_FENCE,
127   },
128   .emit = upload_psp_urb_cbs,
129};
130
131uint32_t
132brw_depthbuffer_format(struct brw_context *brw)
133{
134   struct gl_context *ctx = &brw->ctx;
135   struct gl_framebuffer *fb = ctx->DrawBuffer;
136   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
137   struct intel_renderbuffer *srb;
138
139   if (!drb &&
140       (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
141       !srb->mt->stencil_mt &&
142       (intel_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
143	intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
144      drb = srb;
145   }
146
147   if (!drb)
148      return BRW_DEPTHFORMAT_D32_FLOAT;
149
150   return brw_depth_format(brw, drb->mt->format);
151}
152
153/**
154 * Returns the mask of how many bits of x and y must be handled through the
155 * depthbuffer's draw offset x and y fields.
156 *
157 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
158 * between the depth, hiz, and stencil buffers.  Because it can be hard to get
159 * all 3 to agree on this value, we want to do as much drawing offset
160 * adjustment as possible by moving the base offset of the 3 buffers, which is
161 * restricted to tile boundaries.
162 *
163 * For each buffer, the remainder must be applied through the x/y draw offset.
164 * This returns the worst-case mask of the low bits that have to go into the
165 * packet.  If the 3 buffers don't agree on the drawing offset ANDed with this
166 * mask, then we're in trouble.
167 */
168void
169brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
170                                uint32_t depth_level,
171                                uint32_t depth_layer,
172                                struct intel_mipmap_tree *stencil_mt,
173                                uint32_t *out_tile_mask_x,
174                                uint32_t *out_tile_mask_y)
175{
176   uint32_t tile_mask_x = 0, tile_mask_y = 0;
177
178   if (depth_mt) {
179      intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
180                           depth_mt->cpp,
181                           &tile_mask_x, &tile_mask_y);
182
183      if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
184         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
185         intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
186                              depth_mt->hiz_buf->mt->tr_mode,
187                              depth_mt->hiz_buf->mt->cpp,
188                              &hiz_tile_mask_x,
189                              &hiz_tile_mask_y);
190
191         /* Each HiZ row represents 2 rows of pixels */
192         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
193
194         tile_mask_x |= hiz_tile_mask_x;
195         tile_mask_y |= hiz_tile_mask_y;
196      }
197   }
198
199   if (stencil_mt) {
200      if (stencil_mt->stencil_mt)
201	 stencil_mt = stencil_mt->stencil_mt;
202
203      if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
204         /* Separate stencil buffer uses 64x64 tiles. */
205         tile_mask_x |= 63;
206         tile_mask_y |= 63;
207      } else {
208         uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
209         intel_get_tile_masks(stencil_mt->tiling,
210                              stencil_mt->tr_mode,
211                              stencil_mt->cpp,
212                              &stencil_tile_mask_x,
213                              &stencil_tile_mask_y);
214
215         tile_mask_x |= stencil_tile_mask_x;
216         tile_mask_y |= stencil_tile_mask_y;
217      }
218   }
219
220   *out_tile_mask_x = tile_mask_x;
221   *out_tile_mask_y = tile_mask_y;
222}
223
224static struct intel_mipmap_tree *
225get_stencil_miptree(struct intel_renderbuffer *irb)
226{
227   if (!irb)
228      return NULL;
229   if (irb->mt->stencil_mt)
230      return irb->mt->stencil_mt;
231   return irb->mt;
232}
233
234void
235brw_workaround_depthstencil_alignment(struct brw_context *brw,
236                                      GLbitfield clear_mask)
237{
238   struct gl_context *ctx = &brw->ctx;
239   struct gl_framebuffer *fb = ctx->DrawBuffer;
240   bool rebase_depth = false;
241   bool rebase_stencil = false;
242   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
243   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
244   struct intel_mipmap_tree *depth_mt = NULL;
245   struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
246   uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
247   uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
248   bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
249   bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
250
251   if (depth_irb)
252      depth_mt = depth_irb->mt;
253
254   /* Initialize brw->depthstencil to 'nop' workaround state.
255    */
256   brw->depthstencil.tile_x = 0;
257   brw->depthstencil.tile_y = 0;
258   brw->depthstencil.depth_offset = 0;
259   brw->depthstencil.stencil_offset = 0;
260   brw->depthstencil.hiz_offset = 0;
261   brw->depthstencil.depth_mt = NULL;
262   brw->depthstencil.stencil_mt = NULL;
263   if (depth_irb)
264      brw->depthstencil.depth_mt = depth_mt;
265   if (stencil_irb)
266      brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb);
267
268   /* Gen6+ doesn't require the workarounds, since we always program the
269    * surface state at the start of the whole surface.
270    */
271   if (brw->gen >= 6)
272      return;
273
274   /* Check if depth buffer is in depth/stencil format.  If so, then it's only
275    * safe to invalidate it if we're also clearing stencil, and both depth_irb
276    * and stencil_irb point to the same miptree.
277    *
278    * Note: it's not sufficient to check for the case where
279    * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
280    * because this fails to catch depth/stencil buffers on hardware that uses
281    * separate stencil.  To catch that case, we check whether
282    * depth_mt->stencil_mt is non-NULL.
283    */
284   if (depth_irb && invalidate_depth &&
285       (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
286        depth_mt->stencil_mt)) {
287      invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
288         && depth_irb->mt == stencil_irb->mt;
289   }
290
291   uint32_t tile_mask_x, tile_mask_y;
292   brw_get_depthstencil_tile_masks(depth_mt,
293                                   depth_mt ? depth_irb->mt_level : 0,
294                                   depth_mt ? depth_irb->mt_layer : 0,
295                                   stencil_mt,
296                                   &tile_mask_x, &tile_mask_y);
297
298   if (depth_irb) {
299      tile_x = depth_irb->draw_x & tile_mask_x;
300      tile_y = depth_irb->draw_y & tile_mask_y;
301
302      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
303       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
304       * Coordinate Offset X/Y":
305       *
306       *   "The 3 LSBs of both offsets must be zero to ensure correct
307       *   alignment"
308       */
309      if (tile_x & 7 || tile_y & 7)
310         rebase_depth = true;
311
312      /* We didn't even have intra-tile offsets before g45. */
313      if (!brw->has_surface_tile_offset) {
314         if (tile_x || tile_y)
315            rebase_depth = true;
316      }
317
318      if (rebase_depth) {
319         perf_debug("HW workaround: blitting depth level %d to a temporary "
320                    "to fix alignment (depth tile offset %d,%d)\n",
321                    depth_irb->mt_level, tile_x, tile_y);
322         intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
323         /* In the case of stencil_irb being the same packed depth/stencil
324          * texture but not the same rb, make it point at our rebased mt, too.
325          */
326         if (stencil_irb &&
327             stencil_irb != depth_irb &&
328             stencil_irb->mt == depth_mt) {
329            intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
330            intel_renderbuffer_set_draw_offset(stencil_irb);
331         }
332
333         stencil_mt = get_stencil_miptree(stencil_irb);
334
335         tile_x = depth_irb->draw_x & tile_mask_x;
336         tile_y = depth_irb->draw_y & tile_mask_y;
337      }
338
339      if (stencil_irb) {
340         stencil_mt = get_stencil_miptree(stencil_irb);
341         intel_miptree_get_image_offset(stencil_mt,
342                                        stencil_irb->mt_level,
343                                        stencil_irb->mt_layer,
344                                        &stencil_draw_x, &stencil_draw_y);
345         int stencil_tile_x = stencil_draw_x & tile_mask_x;
346         int stencil_tile_y = stencil_draw_y & tile_mask_y;
347
348         /* If stencil doesn't match depth, then we'll need to rebase stencil
349          * as well.  (if we hadn't decided to rebase stencil before, the
350          * post-stencil depth test will also rebase depth to try to match it
351          * up).
352          */
353         if (tile_x != stencil_tile_x ||
354             tile_y != stencil_tile_y) {
355            rebase_stencil = true;
356         }
357      }
358   }
359
360   /* If we have (just) stencil, check it for ignored low bits as well */
361   if (stencil_irb) {
362      intel_miptree_get_image_offset(stencil_mt,
363                                     stencil_irb->mt_level,
364                                     stencil_irb->mt_layer,
365                                     &stencil_draw_x, &stencil_draw_y);
366      stencil_tile_x = stencil_draw_x & tile_mask_x;
367      stencil_tile_y = stencil_draw_y & tile_mask_y;
368
369      if (stencil_tile_x & 7 || stencil_tile_y & 7)
370         rebase_stencil = true;
371
372      if (!brw->has_surface_tile_offset) {
373         if (stencil_tile_x || stencil_tile_y)
374            rebase_stencil = true;
375      }
376   }
377
378   if (rebase_stencil) {
379      perf_debug("HW workaround: blitting stencil level %d to a temporary "
380                 "to fix alignment (stencil tile offset %d,%d)\n",
381                 stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
382
383      intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil);
384      stencil_mt = get_stencil_miptree(stencil_irb);
385
386      intel_miptree_get_image_offset(stencil_mt,
387                                     stencil_irb->mt_level,
388                                     stencil_irb->mt_layer,
389                                     &stencil_draw_x, &stencil_draw_y);
390      stencil_tile_x = stencil_draw_x & tile_mask_x;
391      stencil_tile_y = stencil_draw_y & tile_mask_y;
392
393      if (depth_irb && depth_irb->mt == stencil_irb->mt) {
394         intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
395         intel_renderbuffer_set_draw_offset(depth_irb);
396      } else if (depth_irb && !rebase_depth) {
397         if (tile_x != stencil_tile_x ||
398             tile_y != stencil_tile_y) {
399            perf_debug("HW workaround: blitting depth level %d to a temporary "
400                       "to match stencil level %d alignment (depth tile offset "
401                       "%d,%d, stencil offset %d,%d)\n",
402                       depth_irb->mt_level,
403                       stencil_irb->mt_level,
404                       tile_x, tile_y,
405                       stencil_tile_x, stencil_tile_y);
406
407            intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
408
409            tile_x = depth_irb->draw_x & tile_mask_x;
410            tile_y = depth_irb->draw_y & tile_mask_y;
411
412            if (stencil_irb && stencil_irb->mt == depth_mt) {
413               intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
414               intel_renderbuffer_set_draw_offset(stencil_irb);
415            }
416
417            WARN_ONCE(stencil_tile_x != tile_x ||
418                      stencil_tile_y != tile_y,
419                      "Rebased stencil tile offset (%d,%d) doesn't match depth "
420                      "tile offset (%d,%d).\n",
421                      stencil_tile_x, stencil_tile_y,
422                      tile_x, tile_y);
423         }
424      }
425   }
426
427   if (!depth_irb) {
428      tile_x = stencil_tile_x;
429      tile_y = stencil_tile_y;
430   }
431
432   /* While we just tried to get everything aligned, we may have failed to do
433    * so in the case of rendering to array or 3D textures, where nonzero faces
434    * will still have an offset post-rebase.  At least give an informative
435    * warning.
436    */
437   WARN_ONCE((tile_x & 7) || (tile_y & 7),
438             "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
439             "Truncating offset, bad rendering may occur.\n");
440   tile_x &= ~7;
441   tile_y &= ~7;
442
443   /* Now, after rebasing, save off the new dephtstencil state so the hardware
444    * packets can just dereference that without re-calculating tile offsets.
445    */
446   brw->depthstencil.tile_x = tile_x;
447   brw->depthstencil.tile_y = tile_y;
448   if (depth_irb) {
449      depth_mt = depth_irb->mt;
450      brw->depthstencil.depth_mt = depth_mt;
451      brw->depthstencil.depth_offset =
452         intel_miptree_get_aligned_offset(depth_mt,
453                                          depth_irb->draw_x & ~tile_mask_x,
454                                          depth_irb->draw_y & ~tile_mask_y);
455      if (intel_renderbuffer_has_hiz(depth_irb)) {
456         brw->depthstencil.hiz_offset =
457            intel_miptree_get_aligned_offset(depth_mt,
458                                             depth_irb->draw_x & ~tile_mask_x,
459                                             (depth_irb->draw_y & ~tile_mask_y) / 2);
460      }
461   }
462   if (stencil_irb) {
463      stencil_mt = get_stencil_miptree(stencil_irb);
464
465      brw->depthstencil.stencil_mt = stencil_mt;
466      if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
467         /* Note: we can't compute the stencil offset using
468          * intel_region_get_aligned_offset(), because stencil_region claims
469          * that the region is untiled even though it's W tiled.
470          */
471         brw->depthstencil.stencil_offset =
472            (stencil_draw_y & ~tile_mask_y) * stencil_mt->pitch +
473            (stencil_draw_x & ~tile_mask_x) * 64;
474      }
475   }
476}
477
478void
479brw_emit_depthbuffer(struct brw_context *brw)
480{
481   struct gl_context *ctx = &brw->ctx;
482   struct gl_framebuffer *fb = ctx->DrawBuffer;
483   /* _NEW_BUFFERS */
484   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
485   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
486   struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
487   struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
488   uint32_t tile_x = brw->depthstencil.tile_x;
489   uint32_t tile_y = brw->depthstencil.tile_y;
490   bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
491   bool separate_stencil = false;
492   uint32_t depth_surface_type = BRW_SURFACE_NULL;
493   uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
494   uint32_t depth_offset = 0;
495   uint32_t width = 1, height = 1;
496
497   if (stencil_mt) {
498      separate_stencil = stencil_mt->format == MESA_FORMAT_S_UINT8;
499
500      /* Gen7 supports only separate stencil */
501      assert(separate_stencil || brw->gen < 7);
502   }
503
504   /* If there's a packed depth/stencil bound to stencil only, we need to
505    * emit the packed depth/stencil buffer packet.
506    */
507   if (!depth_irb && stencil_irb && !separate_stencil) {
508      depth_irb = stencil_irb;
509      depth_mt = stencil_mt;
510   }
511
512   if (depth_irb && depth_mt) {
513      /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
514       * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
515       * depthstencil format.
516       *
517       * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
518       * set to the same value. Gens after 7 implicitly always set
519       * Separate_Stencil_Enable; software cannot disable it.
520       */
521      if ((brw->gen < 7 && hiz) || brw->gen >= 7) {
522         assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
523      }
524
525      /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
526      assert(brw->gen >= 7 || !separate_stencil || hiz);
527
528      assert(brw->gen < 6 || depth_mt->tiling == I915_TILING_Y);
529      assert(!hiz || depth_mt->tiling == I915_TILING_Y);
530
531      depthbuffer_format = brw_depthbuffer_format(brw);
532      depth_surface_type = BRW_SURFACE_2D;
533      depth_offset = brw->depthstencil.depth_offset;
534      width = depth_irb->Base.Base.Width;
535      height = depth_irb->Base.Base.Height;
536   } else if (separate_stencil) {
537      /*
538       * There exists a separate stencil buffer but no depth buffer.
539       *
540       * The stencil buffer inherits most of its fields from
541       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
542       * height.
543       *
544       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
545       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
546       *     [DevGT+]: This field must be set to TRUE.
547       */
548      assert(brw->has_separate_stencil);
549
550      depth_surface_type = BRW_SURFACE_2D;
551      width = stencil_irb->Base.Base.Width;
552      height = stencil_irb->Base.Base.Height;
553   }
554
555   if (depth_mt)
556      brw_render_cache_set_check_flush(brw, depth_mt->bo);
557   if (stencil_mt)
558      brw_render_cache_set_check_flush(brw, stencil_mt->bo);
559
560   brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
561                                    depthbuffer_format, depth_surface_type,
562                                    stencil_mt, hiz, separate_stencil,
563                                    width, height, tile_x, tile_y);
564}
565
566void
567brw_emit_depth_stencil_hiz(struct brw_context *brw,
568                           struct intel_mipmap_tree *depth_mt,
569                           uint32_t depth_offset, uint32_t depthbuffer_format,
570                           uint32_t depth_surface_type,
571                           struct intel_mipmap_tree *stencil_mt,
572                           bool hiz, bool separate_stencil,
573                           uint32_t width, uint32_t height,
574                           uint32_t tile_x, uint32_t tile_y)
575{
576   /* Enable the hiz bit if we're doing separate stencil, because it and the
577    * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
578    * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
579    *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
580    *     Enable must also be enabled.
581    *
582    *     [DevGT]: This field must be set to the same value (enabled or
583    *     disabled) as Hierarchical Depth Buffer Enable
584    */
585   bool enable_hiz_ss = hiz || separate_stencil;
586
587
588   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
589    * non-pipelined state that will need the PIPE_CONTROL workaround.
590    */
591   if (brw->gen == 6) {
592      brw_emit_depth_stall_flushes(brw);
593   }
594
595   unsigned int len;
596   if (brw->gen >= 6)
597      len = 7;
598   else if (brw->is_g4x || brw->gen == 5)
599      len = 6;
600   else
601      len = 5;
602
603   BEGIN_BATCH(len);
604   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
605   OUT_BATCH((depth_mt ? depth_mt->pitch - 1 : 0) |
606             (depthbuffer_format << 18) |
607             ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
608             ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
609             (BRW_TILEWALK_YMAJOR << 26) |
610             ((depth_mt ? depth_mt->tiling != I915_TILING_NONE : 1)
611              << 27) |
612             (depth_surface_type << 29));
613
614   if (depth_mt) {
615      OUT_RELOC(depth_mt->bo,
616		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
617		depth_offset);
618   } else {
619      OUT_BATCH(0);
620   }
621
622   OUT_BATCH(((width + tile_x - 1) << 6) |
623             ((height + tile_y - 1) << 19));
624   OUT_BATCH(0);
625
626   if (brw->is_g4x || brw->gen >= 5)
627      OUT_BATCH(tile_x | (tile_y << 16));
628   else
629      assert(tile_x == 0 && tile_y == 0);
630
631   if (brw->gen >= 6)
632      OUT_BATCH(0);
633
634   ADVANCE_BATCH();
635
636   if (hiz || separate_stencil) {
637      /*
638       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
639       * stencil enable' and 'hiz enable' bits were set. Therefore we must
640       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
641       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
642       * failure to do so causes hangs on gen5 and a stall on gen6.
643       */
644
645      /* Emit hiz buffer. */
646      if (hiz) {
647         assert(depth_mt);
648         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
649	 BEGIN_BATCH(3);
650	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
651	 OUT_BATCH(hiz_mt->pitch - 1);
652	 OUT_RELOC(hiz_mt->bo,
653		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
654		   brw->depthstencil.hiz_offset);
655	 ADVANCE_BATCH();
656      } else {
657	 BEGIN_BATCH(3);
658	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
659	 OUT_BATCH(0);
660	 OUT_BATCH(0);
661	 ADVANCE_BATCH();
662      }
663
664      /* Emit stencil buffer. */
665      if (separate_stencil) {
666	 BEGIN_BATCH(3);
667	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
668         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
669          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
670          *    The pitch must be set to 2x the value computed based on width, as
671          *    the stencil buffer is stored with two rows interleaved.
672          */
673	 OUT_BATCH(2 * stencil_mt->pitch - 1);
674	 OUT_RELOC(stencil_mt->bo,
675		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
676		   brw->depthstencil.stencil_offset);
677	 ADVANCE_BATCH();
678      } else {
679	 BEGIN_BATCH(3);
680	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
681	 OUT_BATCH(0);
682	 OUT_BATCH(0);
683	 ADVANCE_BATCH();
684      }
685   }
686
687   /*
688    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
689    * params must be emitted.
690    *
691    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
692    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
693    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
694    */
695   if (brw->gen >= 6 || hiz) {
696      BEGIN_BATCH(2);
697      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
698		GEN5_DEPTH_CLEAR_VALID |
699		(2 - 2));
700      OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
701      ADVANCE_BATCH();
702   }
703}
704
705const struct brw_tracked_state brw_depthbuffer = {
706   .dirty = {
707      .mesa = _NEW_BUFFERS,
708      .brw = BRW_NEW_BATCH |
709             BRW_NEW_BLORP,
710   },
711   .emit = brw_emit_depthbuffer,
712};
713
714/**
715 * Polygon stipple packet
716 */
717static void
718upload_polygon_stipple(struct brw_context *brw)
719{
720   struct gl_context *ctx = &brw->ctx;
721   GLuint i;
722
723   /* _NEW_POLYGON */
724   if (!ctx->Polygon.StippleFlag)
725      return;
726
727   BEGIN_BATCH(33);
728   OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
729
730   /* Polygon stipple is provided in OpenGL order, i.e. bottom
731    * row first.  If we're rendering to a window (i.e. the
732    * default frame buffer object, 0), then we need to invert
733    * it to match our pixel layout.  But if we're rendering
734    * to a FBO (i.e. any named frame buffer object), we *don't*
735    * need to invert - we already match the layout.
736    */
737   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
738      for (i = 0; i < 32; i++)
739	  OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
740   } else {
741      for (i = 0; i < 32; i++)
742	 OUT_BATCH(ctx->PolygonStipple[i]);
743   }
744   ADVANCE_BATCH();
745}
746
747const struct brw_tracked_state brw_polygon_stipple = {
748   .dirty = {
749      .mesa = _NEW_POLYGON |
750              _NEW_POLYGONSTIPPLE,
751      .brw = BRW_NEW_CONTEXT,
752   },
753   .emit = upload_polygon_stipple
754};
755
756/**
757 * Polygon stipple offset packet
758 */
759static void
760upload_polygon_stipple_offset(struct brw_context *brw)
761{
762   struct gl_context *ctx = &brw->ctx;
763
764   /* _NEW_POLYGON */
765   if (!ctx->Polygon.StippleFlag)
766      return;
767
768   BEGIN_BATCH(2);
769   OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
770
771   /* _NEW_BUFFERS
772    *
773    * If we're drawing to a system window we have to invert the Y axis
774    * in order to match the OpenGL pixel coordinate system, and our
775    * offset must be matched to the window position.  If we're drawing
776    * to a user-created FBO then our native pixel coordinate system
777    * works just fine, and there's no window system to worry about.
778    */
779   if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
780      OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31);
781   else
782      OUT_BATCH(0);
783   ADVANCE_BATCH();
784}
785
786const struct brw_tracked_state brw_polygon_stipple_offset = {
787   .dirty = {
788      .mesa = _NEW_BUFFERS |
789              _NEW_POLYGON,
790      .brw = BRW_NEW_CONTEXT,
791   },
792   .emit = upload_polygon_stipple_offset
793};
794
795/**
796 * Line stipple packet
797 */
798static void
799upload_line_stipple(struct brw_context *brw)
800{
801   struct gl_context *ctx = &brw->ctx;
802   GLfloat tmp;
803   GLint tmpi;
804
805   if (!ctx->Line.StippleFlag)
806      return;
807
808   BEGIN_BATCH(3);
809   OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
810   OUT_BATCH(ctx->Line.StipplePattern);
811
812   if (brw->gen >= 7) {
813      /* in U1.16 */
814      tmp = 1.0f / ctx->Line.StippleFactor;
815      tmpi = tmp * (1<<16);
816      OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
817   } else {
818      /* in U1.13 */
819      tmp = 1.0f / ctx->Line.StippleFactor;
820      tmpi = tmp * (1<<13);
821      OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
822   }
823
824   ADVANCE_BATCH();
825}
826
827const struct brw_tracked_state brw_line_stipple = {
828   .dirty = {
829      .mesa = _NEW_LINE,
830      .brw = BRW_NEW_CONTEXT,
831   },
832   .emit = upload_line_stipple
833};
834
835void
836brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
837{
838   const bool is_965 = brw->gen == 4 && !brw->is_g4x;
839   const uint32_t _3DSTATE_PIPELINE_SELECT =
840      is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
841
842   if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) {
843      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
844       * PIPELINE_SELECT [DevBWR+]":
845       *
846       *   Project: HSW, BDW, CHV, SKL, BXT
847       *
848       *   Hardware Binding Tables are only supported for 3D
849       *   workloads. Resource streamer must be enabled only for 3D
850       *   workloads. Resource streamer must be disabled for Media and GPGPU
851       *   workloads.
852       */
853      BEGIN_BATCH(1);
854      OUT_BATCH(MI_RS_CONTROL | 0);
855      ADVANCE_BATCH();
856
857      gen7_disable_hw_binding_tables(brw);
858
859      /* XXX - Disable gather constant pool too when we start using it. */
860   }
861
862   if (brw->gen >= 8 && brw->gen < 10) {
863      /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
864       *
865       *   Software must clear the COLOR_CALC_STATE Valid field in
866       *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
867       *   with Pipeline Select set to GPGPU.
868       *
869       * The internal hardware docs recommend the same workaround for Gen9
870       * hardware too.
871       */
872      if (pipeline == BRW_COMPUTE_PIPELINE) {
873         BEGIN_BATCH(2);
874         OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
875         OUT_BATCH(0);
876         ADVANCE_BATCH();
877
878         brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
879      }
880   }
881
882   if (brw->gen >= 6) {
883      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
884       * PIPELINE_SELECT [DevBWR+]":
885       *
886       *   Project: DEVSNB+
887       *
888       *   Software must ensure all the write caches are flushed through a
889       *   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
890       *   command to invalidate read only caches prior to programming
891       *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
892       */
893      const unsigned dc_flush =
894         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
895
896      brw_emit_pipe_control_flush(brw,
897                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
898                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
899                                  dc_flush |
900                                  PIPE_CONTROL_NO_WRITE |
901                                  PIPE_CONTROL_CS_STALL);
902
903      brw_emit_pipe_control_flush(brw,
904                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
905                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
906                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE |
907                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE |
908                                  PIPE_CONTROL_NO_WRITE);
909
910   } else {
911      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
912       * PIPELINE_SELECT [DevBWR+]":
913       *
914       *   Project: PRE-DEVSNB
915       *
916       *   Software must ensure the current pipeline is flushed via an
917       *   MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
918       */
919      BEGIN_BATCH(1);
920      OUT_BATCH(MI_FLUSH);
921      ADVANCE_BATCH();
922   }
923
924   /* Select the pipeline */
925   BEGIN_BATCH(1);
926   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
927             (brw->gen >= 9 ? (3 << 8) : 0) |
928             (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
929   ADVANCE_BATCH();
930
931   if (brw->gen == 7 && !brw->is_haswell &&
932       pipeline == BRW_RENDER_PIPELINE) {
933      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
934       * PIPELINE_SELECT [DevBWR+]":
935       *
936       *   Project: DEVIVB, DEVHSW:GT3:A0
937       *
938       *   Software must send a pipe_control with a CS stall and a post sync
939       *   operation and then a dummy DRAW after every MI_SET_CONTEXT and
940       *   after any PIPELINE_SELECT that is enabling 3D mode.
941       */
942      gen7_emit_cs_stall_flush(brw);
943
944      BEGIN_BATCH(7);
945      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
946      OUT_BATCH(_3DPRIM_POINTLIST);
947      OUT_BATCH(0);
948      OUT_BATCH(0);
949      OUT_BATCH(0);
950      OUT_BATCH(0);
951      OUT_BATCH(0);
952      ADVANCE_BATCH();
953   }
954
955   if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) {
956      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
957       * PIPELINE_SELECT [DevBWR+]":
958       *
959       *   Project: HSW, BDW, CHV, SKL, BXT
960       *
961       *   Hardware Binding Tables are only supported for 3D
962       *   workloads. Resource streamer must be enabled only for 3D
963       *   workloads. Resource streamer must be disabled for Media and GPGPU
964       *   workloads.
965       */
966      BEGIN_BATCH(1);
967      OUT_BATCH(MI_RS_CONTROL | 1);
968      ADVANCE_BATCH();
969
970      gen7_enable_hw_binding_tables(brw);
971
972      /* XXX - Re-enable gather constant pool here. */
973   }
974}
975
976/**
977 * Misc invariant state packets
978 */
979void
980brw_upload_invariant_state(struct brw_context *brw)
981{
982   const bool is_965 = brw->gen == 4 && !brw->is_g4x;
983
984   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
985   brw->last_pipeline = BRW_RENDER_PIPELINE;
986
987   if (brw->gen >= 8) {
988      BEGIN_BATCH(3);
989      OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
990      OUT_BATCH(0);
991      OUT_BATCH(0);
992      ADVANCE_BATCH();
993   } else {
994      BEGIN_BATCH(2);
995      OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
996      OUT_BATCH(0);
997      ADVANCE_BATCH();
998   }
999
1000   /* Original Gen4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */
1001   if (!is_965) {
1002      BEGIN_BATCH(3);
1003      OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
1004      /* use legacy aa line coverage computation */
1005      OUT_BATCH(0);
1006      OUT_BATCH(0);
1007      ADVANCE_BATCH();
1008   }
1009
1010   const uint32_t _3DSTATE_VF_STATISTICS =
1011      is_965 ? GEN4_3DSTATE_VF_STATISTICS : GM45_3DSTATE_VF_STATISTICS;
1012   BEGIN_BATCH(1);
1013   OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 | 1);
1014   ADVANCE_BATCH();
1015}
1016
1017const struct brw_tracked_state brw_invariant_state = {
1018   .dirty = {
1019      .mesa = 0,
1020      .brw = BRW_NEW_BLORP |
1021             BRW_NEW_CONTEXT,
1022   },
1023   .emit = brw_upload_invariant_state
1024};
1025
1026/**
1027 * Define the base addresses which some state is referenced from.
1028 *
1029 * This allows us to avoid having to emit relocations for the objects,
1030 * and is actually required for binding table pointers on gen6.
1031 *
1032 * Surface state base address covers binding table pointers and
1033 * surface state objects, but not the surfaces that the surface state
1034 * objects point to.
1035 */
1036void
1037brw_upload_state_base_address(struct brw_context *brw)
1038{
1039   if (brw->batch.state_base_address_emitted)
1040      return;
1041
1042   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
1043    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
1044    * programmed prior to STATE_BASE_ADDRESS.
1045    *
1046    * However, given that the instruction SBA (general state base
1047    * address) on this chipset is always set to 0 across X and GL,
1048    * maybe this isn't required for us in particular.
1049    */
1050
1051   if (brw->gen >= 8) {
1052      uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
1053      int pkt_len = brw->gen >= 9 ? 19 : 16;
1054
1055      BEGIN_BATCH(pkt_len);
1056      OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2));
1057      /* General state base address: stateless DP read/write requests */
1058      OUT_BATCH(mocs_wb << 4 | 1);
1059      OUT_BATCH(0);
1060      OUT_BATCH(mocs_wb << 16);
1061      /* Surface state base address: */
1062      OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1063                  mocs_wb << 4 | 1);
1064      /* Dynamic state base address: */
1065      OUT_RELOC64(brw->batch.bo,
1066                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1067                  mocs_wb << 4 | 1);
1068      /* Indirect object base address: MEDIA_OBJECT data */
1069      OUT_BATCH(mocs_wb << 4 | 1);
1070      OUT_BATCH(0);
1071      /* Instruction base address: shader kernels (incl. SIP) */
1072      OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1073                  mocs_wb << 4 | 1);
1074
1075      /* General state buffer size */
1076      OUT_BATCH(0xfffff001);
1077      /* Dynamic state buffer size */
1078      OUT_BATCH(ALIGN(brw->batch.bo->size, 4096) | 1);
1079      /* Indirect object upper bound */
1080      OUT_BATCH(0xfffff001);
1081      /* Instruction access upper bound */
1082      OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1);
1083      if (brw->gen >= 9) {
1084         OUT_BATCH(1);
1085         OUT_BATCH(0);
1086         OUT_BATCH(0);
1087      }
1088      ADVANCE_BATCH();
1089   } else if (brw->gen >= 6) {
1090      uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
1091
1092       BEGIN_BATCH(10);
1093       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
1094       OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */
1095                 mocs << 4 | /* Stateless Data Port Access Memory Object Control State */
1096                 1); /* General State Base Address Modify Enable */
1097       /* Surface state base address:
1098	* BINDING_TABLE_STATE
1099	* SURFACE_STATE
1100	*/
1101       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
1102        /* Dynamic state base address:
1103	 * SAMPLER_STATE
1104	 * SAMPLER_BORDER_COLOR_STATE
1105	 * CLIP, SF, WM/CC viewport state
1106	 * COLOR_CALC_STATE
1107	 * DEPTH_STENCIL_STATE
1108	 * BLEND_STATE
1109	 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
1110	 * Disable is clear, which we rely on)
1111	 */
1112       OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
1113				   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
1114
1115       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
1116       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1117		 1); /* Instruction base address: shader kernels (incl. SIP) */
1118
1119       OUT_BATCH(1); /* General state upper bound */
1120       /* Dynamic state upper bound.  Although the documentation says that
1121	* programming it to zero will cause it to be ignored, that is a lie.
1122	* If this isn't programmed to a real bound, the sampler border color
1123	* pointer is rejected, causing border color to mysteriously fail.
1124	*/
1125       OUT_BATCH(0xfffff001);
1126       OUT_BATCH(1); /* Indirect object upper bound */
1127       OUT_BATCH(1); /* Instruction access upper bound */
1128       ADVANCE_BATCH();
1129   } else if (brw->gen == 5) {
1130       BEGIN_BATCH(8);
1131       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
1132       OUT_BATCH(1); /* General state base address */
1133       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1134		 1); /* Surface state base address */
1135       OUT_BATCH(1); /* Indirect object base address */
1136       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1137		 1); /* Instruction base address */
1138       OUT_BATCH(0xfffff001); /* General state upper bound */
1139       OUT_BATCH(1); /* Indirect object upper bound */
1140       OUT_BATCH(1); /* Instruction access upper bound */
1141       ADVANCE_BATCH();
1142   } else {
1143       BEGIN_BATCH(6);
1144       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
1145       OUT_BATCH(1); /* General state base address */
1146       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1147		 1); /* Surface state base address */
1148       OUT_BATCH(1); /* Indirect object base address */
1149       OUT_BATCH(1); /* General state upper bound */
1150       OUT_BATCH(1); /* Indirect object upper bound */
1151       ADVANCE_BATCH();
1152   }
1153
1154   /* According to section 3.6.1 of VOL1 of the 965 PRM,
1155    * STATE_BASE_ADDRESS updates require a reissue of:
1156    *
1157    * 3DSTATE_PIPELINE_POINTERS
1158    * 3DSTATE_BINDING_TABLE_POINTERS
1159    * MEDIA_STATE_POINTERS
1160    *
1161    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
1162    * 1 part 1 says that the folowing packets must be reissued:
1163    *
1164    * 3DSTATE_CC_POINTERS
1165    * 3DSTATE_BINDING_TABLE_POINTERS
1166    * 3DSTATE_SAMPLER_STATE_POINTERS
1167    * 3DSTATE_VIEWPORT_STATE_POINTERS
1168    * MEDIA_STATE_POINTERS
1169    *
1170    * Those are always reissued following SBA updates anyway (new
1171    * batch time), except in the case of the program cache BO
1172    * changing.  Having a separate state flag makes the sequence more
1173    * obvious.
1174    */
1175
1176   brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
1177   brw->batch.state_base_address_emitted = true;
1178}
1179