brw_misc_state.c revision fb5ff51f422e1718c09da01f3c5bb5baecc9d68e
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33
34#include "intel_batchbuffer.h"
35#include "intel_fbo.h"
36#include "intel_regions.h"
37
38#include "brw_context.h"
39#include "brw_state.h"
40#include "brw_defines.h"
41
42/* Constant single cliprect for framebuffer object or DRI2 drawing */
43static void upload_drawing_rect(struct brw_context *brw)
44{
45   struct intel_context *intel = &brw->intel;
46   struct gl_context *ctx = &intel->ctx;
47
48   BEGIN_BATCH(4);
49   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
50   OUT_BATCH(0); /* xmin, ymin */
51   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
52	    ((ctx->DrawBuffer->Height - 1) << 16));
53   OUT_BATCH(0);
54   ADVANCE_BATCH();
55}
56
57const struct brw_tracked_state brw_drawing_rect = {
58   .dirty = {
59      .mesa = _NEW_BUFFERS,
60      .brw = BRW_NEW_CONTEXT,
61      .cache = 0
62   },
63   .emit = upload_drawing_rect
64};
65
66/**
67 * Upload the binding table pointers, which point each stage's array of surface
68 * state pointers.
69 *
70 * The binding table pointers are relative to the surface state base address,
71 * which points at the batchbuffer containing the streamed batch state.
72 */
73static void upload_binding_table_pointers(struct brw_context *brw)
74{
75   struct intel_context *intel = &brw->intel;
76
77   BEGIN_BATCH(6);
78   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
79   OUT_BATCH(brw->vs.bind_bo_offset);
80   OUT_BATCH(0); /* gs */
81   OUT_BATCH(0); /* clip */
82   OUT_BATCH(0); /* sf */
83   OUT_BATCH(brw->wm.bind_bo_offset);
84   ADVANCE_BATCH();
85}
86
87const struct brw_tracked_state brw_binding_table_pointers = {
88   .dirty = {
89      .mesa = 0,
90      .brw = (BRW_NEW_BATCH |
91	      BRW_NEW_STATE_BASE_ADDRESS |
92	      BRW_NEW_VS_BINDING_TABLE |
93	      BRW_NEW_GS_BINDING_TABLE |
94	      BRW_NEW_PS_BINDING_TABLE),
95      .cache = 0,
96   },
97   .emit = upload_binding_table_pointers,
98};
99
100/**
101 * Upload the binding table pointers, which point each stage's array of surface
102 * state pointers.
103 *
104 * The binding table pointers are relative to the surface state base address,
105 * which points at the batchbuffer containing the streamed batch state.
106 */
107static void upload_gen6_binding_table_pointers(struct brw_context *brw)
108{
109   struct intel_context *intel = &brw->intel;
110
111   BEGIN_BATCH(4);
112   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
113	     GEN6_BINDING_TABLE_MODIFY_VS |
114	     GEN6_BINDING_TABLE_MODIFY_GS |
115	     GEN6_BINDING_TABLE_MODIFY_PS |
116	     (4 - 2));
117   OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
118   OUT_BATCH(0); /* gs */
119   OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
120   ADVANCE_BATCH();
121}
122
123const struct brw_tracked_state gen6_binding_table_pointers = {
124   .dirty = {
125      .mesa = 0,
126      .brw = (BRW_NEW_BATCH |
127	      BRW_NEW_STATE_BASE_ADDRESS |
128	      BRW_NEW_VS_BINDING_TABLE |
129	      BRW_NEW_GS_BINDING_TABLE |
130	      BRW_NEW_PS_BINDING_TABLE),
131      .cache = 0,
132   },
133   .emit = upload_gen6_binding_table_pointers,
134};
135
136/**
137 * Upload pointers to the per-stage state.
138 *
139 * The state pointers in this packet are all relative to the general state
140 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
141 */
142static void upload_pipelined_state_pointers(struct brw_context *brw )
143{
144   struct intel_context *intel = &brw->intel;
145
146   if (intel->gen == 5) {
147      /* Need to flush before changing clip max threads for errata. */
148      BEGIN_BATCH(1);
149      OUT_BATCH(MI_FLUSH);
150      ADVANCE_BATCH();
151   }
152
153   BEGIN_BATCH(7);
154   OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
155   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
156	     brw->vs.state_offset);
157   if (brw->gs.prog_active)
158      OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
159		brw->gs.state_offset | 1);
160   else
161      OUT_BATCH(0);
162   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
163	     brw->clip.state_offset | 1);
164   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
165	     brw->sf.state_offset);
166   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
167	     brw->wm.state_offset);
168   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
169	     brw->cc.state_offset);
170   ADVANCE_BATCH();
171
172   brw->state.dirty.brw |= BRW_NEW_PSP;
173}
174
175static void upload_psp_urb_cbs(struct brw_context *brw )
176{
177   upload_pipelined_state_pointers(brw);
178   brw_upload_urb_fence(brw);
179   brw_upload_cs_urb_state(brw);
180}
181
182const struct brw_tracked_state brw_psp_urb_cbs = {
183   .dirty = {
184      .mesa = 0,
185      .brw = (BRW_NEW_URB_FENCE |
186	      BRW_NEW_BATCH |
187	      BRW_NEW_STATE_BASE_ADDRESS),
188      .cache = (CACHE_NEW_VS_UNIT |
189		CACHE_NEW_GS_UNIT |
190		CACHE_NEW_GS_PROG |
191		CACHE_NEW_CLIP_UNIT |
192		CACHE_NEW_SF_UNIT |
193		CACHE_NEW_WM_UNIT |
194		CACHE_NEW_CC_UNIT)
195   },
196   .emit = upload_psp_urb_cbs,
197};
198
199static void prepare_depthbuffer(struct brw_context *brw)
200{
201   struct intel_context *intel = &brw->intel;
202   struct gl_context *ctx = &intel->ctx;
203   struct gl_framebuffer *fb = ctx->DrawBuffer;
204   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
205   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
206
207   if (drb)
208      brw_add_validated_bo(brw, drb->region->buffer);
209   if (drb && drb->hiz_region)
210      brw_add_validated_bo(brw, drb->hiz_region->buffer);
211   if (srb)
212      brw_add_validated_bo(brw, srb->region->buffer);
213}
214
215static void emit_depthbuffer(struct brw_context *brw)
216{
217   struct intel_context *intel = &brw->intel;
218   struct gl_context *ctx = &intel->ctx;
219   struct gl_framebuffer *fb = ctx->DrawBuffer;
220   /* _NEW_BUFFERS */
221   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
222   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
223   struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
224   unsigned int len;
225
226   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
227    * non-pipelined state that will need the PIPE_CONTROL workaround.
228    */
229   if (intel->gen == 6)
230      intel_emit_post_sync_nonzero_flush(intel);
231
232   /*
233    * If either depth or stencil buffer has packed depth/stencil format,
234    * then don't use separate stencil. Emit only a depth buffer.
235    */
236   if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) {
237      stencil_irb = NULL;
238   } else if (!depth_irb && stencil_irb
239	      && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
240      depth_irb = stencil_irb;
241      stencil_irb = NULL;
242   }
243
244   if (intel->gen >= 6)
245      len = 7;
246   else if (intel->is_g4x || intel->gen == 5)
247      len = 6;
248   else
249      len = 5;
250
251   if (!depth_irb && !stencil_irb) {
252      BEGIN_BATCH(len);
253      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
254      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
255		(BRW_SURFACE_NULL << 29));
256      OUT_BATCH(0);
257      OUT_BATCH(0);
258      OUT_BATCH(0);
259
260      if (intel->is_g4x || intel->gen >= 5)
261         OUT_BATCH(0);
262
263      if (intel->gen >= 6)
264	 OUT_BATCH(0);
265
266      ADVANCE_BATCH();
267
268   } else if (!depth_irb && stencil_irb) {
269      /*
270       * There exists a separate stencil buffer but no depth buffer.
271       *
272       * The stencil buffer inherits most of its fields from
273       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
274       * height.
275       *
276       * Since the stencil buffer has quirky pitch requirements, its region
277       * was allocated with half height and double cpp. So we need
278       * a multiplier of 2 to obtain the surface's real height.
279       *
280       * Enable the hiz bit because it and the separate stencil bit must have
281       * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
282       * 1.21 "Separate Stencil Enable":
283       *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
284       *     Enable must also be enabled.
285       *
286       *     [DevGT]: This field must be set to the same value (enabled or
287       *     disabled) as Hierarchical Depth Buffer Enable
288       */
289      assert(intel->has_separate_stencil);
290      assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
291
292      BEGIN_BATCH(len);
293      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
294      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
295	        (1 << 21) | /* separate stencil enable */
296	        (1 << 22) | /* hiz enable */
297	        (BRW_TILEWALK_YMAJOR << 26) |
298	        (BRW_SURFACE_2D << 29));
299      OUT_BATCH(0);
300      OUT_BATCH(((stencil_irb->region->width - 1) << 6) |
301	         (2 * stencil_irb->region->height - 1) << 19);
302      OUT_BATCH(0);
303      OUT_BATCH(0);
304
305      if (intel->gen >= 6)
306	 OUT_BATCH(0);
307
308      ADVANCE_BATCH();
309
310   } else {
311      struct intel_region *region = depth_irb->region;
312      unsigned int format;
313      uint32_t tile_x, tile_y, offset;
314
315      /* If using separate stencil, hiz must be enabled. */
316      assert(!stencil_irb || hiz_region);
317
318      switch (region->cpp) {
319      case 2:
320	 format = BRW_DEPTHFORMAT_D16_UNORM;
321	 break;
322      case 4:
323	 if (intel->depth_buffer_is_float)
324	    format = BRW_DEPTHFORMAT_D32_FLOAT;
325	 else if (hiz_region)
326	    format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
327	 else
328	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
329	 break;
330      default:
331	 assert(0);
332	 return;
333      }
334
335      offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
336
337      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
338      assert(!hiz_region || region->tiling == I915_TILING_Y);
339
340      BEGIN_BATCH(len);
341      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
342      OUT_BATCH(((region->pitch * region->cpp) - 1) |
343		(format << 18) |
344		((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
345		((hiz_region ? 1 : 0) << 22) | /* hiz enable */
346		(BRW_TILEWALK_YMAJOR << 26) |
347		((region->tiling != I915_TILING_NONE) << 27) |
348		(BRW_SURFACE_2D << 29));
349      OUT_RELOC(region->buffer,
350		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
351		offset);
352      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
353		((region->width - 1) << 6) |
354		((region->height - 1) << 19));
355      OUT_BATCH(0);
356
357      if (intel->is_g4x || intel->gen >= 5)
358         OUT_BATCH(tile_x | (tile_y << 16));
359      else
360	 assert(tile_x == 0 && tile_y == 0);
361
362      if (intel->gen >= 6)
363	 OUT_BATCH(0);
364
365      ADVANCE_BATCH();
366   }
367
368   if (hiz_region || stencil_irb) {
369      /*
370       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
371       * stencil enable' and 'hiz enable' bits were set. Therefore we must
372       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
373       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
374       * failure to do so causes hangs on gen5 and a stall on gen6.
375       */
376
377      /* Emit hiz buffer. */
378      if (hiz_region) {
379	 BEGIN_BATCH(3);
380	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
381	 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
382	 OUT_RELOC(hiz_region->buffer,
383		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
384		   0);
385	 ADVANCE_BATCH();
386      } else {
387	 BEGIN_BATCH(3);
388	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
389	 OUT_BATCH(0);
390	 OUT_BATCH(0);
391	 ADVANCE_BATCH();
392      }
393
394      /* Emit stencil buffer. */
395      if (stencil_irb) {
396	 BEGIN_BATCH(3);
397	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
398	 OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1);
399	 OUT_RELOC(stencil_irb->region->buffer,
400		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
401		   0);
402	 ADVANCE_BATCH();
403      } else {
404	 BEGIN_BATCH(3);
405	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
406	 OUT_BATCH(0);
407	 OUT_BATCH(0);
408	 ADVANCE_BATCH();
409      }
410   }
411
412   /*
413    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
414    * params must be emitted.
415    *
416    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
417    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
418    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
419    */
420   if (intel->gen >= 6 || hiz_region) {
421      if (intel->gen == 6)
422	 intel_emit_post_sync_nonzero_flush(intel);
423
424      BEGIN_BATCH(2);
425      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
426      OUT_BATCH(0);
427      ADVANCE_BATCH();
428   }
429}
430
431const struct brw_tracked_state brw_depthbuffer = {
432   .dirty = {
433      .mesa = _NEW_BUFFERS,
434      .brw = BRW_NEW_BATCH,
435      .cache = 0,
436   },
437   .prepare = prepare_depthbuffer,
438   .emit = emit_depthbuffer,
439};
440
441
442
443/***********************************************************************
444 * Polygon stipple packet
445 */
446
447static void upload_polygon_stipple(struct brw_context *brw)
448{
449   struct intel_context *intel = &brw->intel;
450   struct gl_context *ctx = &brw->intel.ctx;
451   GLuint i;
452
453   if (!ctx->Polygon.StippleFlag)
454      return;
455
456   if (intel->gen == 6)
457      intel_emit_post_sync_nonzero_flush(intel);
458
459   BEGIN_BATCH(33);
460   OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
461
462   /* Polygon stipple is provided in OpenGL order, i.e. bottom
463    * row first.  If we're rendering to a window (i.e. the
464    * default frame buffer object, 0), then we need to invert
465    * it to match our pixel layout.  But if we're rendering
466    * to a FBO (i.e. any named frame buffer object), we *don't*
467    * need to invert - we already match the layout.
468    */
469   if (ctx->DrawBuffer->Name == 0) {
470      for (i = 0; i < 32; i++)
471	  OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
472   }
473   else {
474      for (i = 0; i < 32; i++)
475	 OUT_BATCH(ctx->PolygonStipple[i]);
476   }
477   CACHED_BATCH();
478}
479
480const struct brw_tracked_state brw_polygon_stipple = {
481   .dirty = {
482      .mesa = _NEW_POLYGONSTIPPLE,
483      .brw = BRW_NEW_CONTEXT,
484      .cache = 0
485   },
486   .emit = upload_polygon_stipple
487};
488
489
490/***********************************************************************
491 * Polygon stipple offset packet
492 */
493
494static void upload_polygon_stipple_offset(struct brw_context *brw)
495{
496   struct intel_context *intel = &brw->intel;
497   struct gl_context *ctx = &brw->intel.ctx;
498
499   if (!ctx->Polygon.StippleFlag)
500      return;
501
502   if (intel->gen == 6)
503      intel_emit_post_sync_nonzero_flush(intel);
504
505   BEGIN_BATCH(2);
506   OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
507
508   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
509    * we have to invert the Y axis in order to match the OpenGL
510    * pixel coordinate system, and our offset must be matched
511    * to the window position.  If we're drawing to a FBO
512    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
513    * system works just fine, and there's no window system to
514    * worry about.
515    */
516   if (brw->intel.ctx.DrawBuffer->Name == 0)
517      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
518   else
519      OUT_BATCH(0);
520   CACHED_BATCH();
521}
522
523#define _NEW_WINDOW_POS 0x40000000
524
525const struct brw_tracked_state brw_polygon_stipple_offset = {
526   .dirty = {
527      .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE,
528      .brw = BRW_NEW_CONTEXT,
529      .cache = 0
530   },
531   .emit = upload_polygon_stipple_offset
532};
533
534/**********************************************************************
535 * AA Line parameters
536 */
537static void upload_aa_line_parameters(struct brw_context *brw)
538{
539   struct intel_context *intel = &brw->intel;
540   struct gl_context *ctx = &brw->intel.ctx;
541
542   if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
543      return;
544
545   if (intel->gen == 6)
546      intel_emit_post_sync_nonzero_flush(intel);
547
548   OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
549   /* use legacy aa line coverage computation */
550   OUT_BATCH(0);
551   OUT_BATCH(0);
552   CACHED_BATCH();
553}
554
555const struct brw_tracked_state brw_aa_line_parameters = {
556   .dirty = {
557      .mesa = _NEW_LINE,
558      .brw = BRW_NEW_CONTEXT,
559      .cache = 0
560   },
561   .emit = upload_aa_line_parameters
562};
563
564/***********************************************************************
565 * Line stipple packet
566 */
567
568static void upload_line_stipple(struct brw_context *brw)
569{
570   struct intel_context *intel = &brw->intel;
571   struct gl_context *ctx = &brw->intel.ctx;
572   GLfloat tmp;
573   GLint tmpi;
574
575   if (!ctx->Line.StippleFlag)
576      return;
577
578   if (intel->gen == 6)
579      intel_emit_post_sync_nonzero_flush(intel);
580
581   BEGIN_BATCH(3);
582   OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
583   OUT_BATCH(ctx->Line.StipplePattern);
584   tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
585   tmpi = tmp * (1<<13);
586   OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
587   CACHED_BATCH();
588}
589
590const struct brw_tracked_state brw_line_stipple = {
591   .dirty = {
592      .mesa = _NEW_LINE,
593      .brw = BRW_NEW_CONTEXT,
594      .cache = 0
595   },
596   .emit = upload_line_stipple
597};
598
599
600/***********************************************************************
601 * Misc invarient state packets
602 */
603
604static void upload_invarient_state( struct brw_context *brw )
605{
606   struct intel_context *intel = &brw->intel;
607
608   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
609   if (intel->gen == 6)
610      intel_emit_post_sync_nonzero_flush(intel);
611
612   /* Select the 3D pipeline (as opposed to media) */
613   BEGIN_BATCH(1);
614   OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
615   ADVANCE_BATCH();
616
617   if (intel->gen < 6) {
618      /* Disable depth offset clamping. */
619      BEGIN_BATCH(2);
620      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
621      OUT_BATCH_F(0.0);
622      ADVANCE_BATCH();
623   }
624
625   if (intel->gen >= 6) {
626      int i;
627      int len = intel->gen >= 7 ? 4 : 3;
628
629      BEGIN_BATCH(len);
630      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
631      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
632		MS_NUMSAMPLES_1);
633      OUT_BATCH(0); /* positions for 4/8-sample */
634      if (intel->gen >= 7)
635	 OUT_BATCH(0);
636      ADVANCE_BATCH();
637
638      BEGIN_BATCH(2);
639      OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
640      OUT_BATCH(1);
641      ADVANCE_BATCH();
642
643      if (intel->gen < 7) {
644	 for (i = 0; i < 4; i++) {
645	    BEGIN_BATCH(4);
646	    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
647	    OUT_BATCH(i << SVB_INDEX_SHIFT);
648	    OUT_BATCH(0);
649	    OUT_BATCH(0xffffffff);
650	    ADVANCE_BATCH();
651	 }
652      }
653   }
654
655   BEGIN_BATCH(2);
656   OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
657   OUT_BATCH(0);
658   ADVANCE_BATCH();
659
660   BEGIN_BATCH(1);
661   OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
662	     (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
663   ADVANCE_BATCH();
664}
665
666const struct brw_tracked_state brw_invarient_state = {
667   .dirty = {
668      .mesa = 0,
669      .brw = BRW_NEW_CONTEXT,
670      .cache = 0
671   },
672   .emit = upload_invarient_state
673};
674
675/**
676 * Define the base addresses which some state is referenced from.
677 *
678 * This allows us to avoid having to emit relocations for the objects,
679 * and is actually required for binding table pointers on gen6.
680 *
681 * Surface state base address covers binding table pointers and
682 * surface state objects, but not the surfaces that the surface state
683 * objects point to.
684 */
685static void upload_state_base_address( struct brw_context *brw )
686{
687   struct intel_context *intel = &brw->intel;
688
689   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
690    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
691    * programmed prior to STATE_BASE_ADDRESS.
692    *
693    * However, given that the instruction SBA (general state base
694    * address) on this chipset is always set to 0 across X and GL,
695    * maybe this isn't required for us in particular.
696    */
697
698   if (intel->gen >= 6) {
699      if (intel->gen == 6)
700	 intel_emit_post_sync_nonzero_flush(intel);
701
702       BEGIN_BATCH(10);
703       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
704       /* General state base address: stateless DP read/write requests */
705       OUT_BATCH(1);
706       /* Surface state base address:
707	* BINDING_TABLE_STATE
708	* SURFACE_STATE
709	*/
710       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
711        /* Dynamic state base address:
712	 * SAMPLER_STATE
713	 * SAMPLER_BORDER_COLOR_STATE
714	 * CLIP, SF, WM/CC viewport state
715	 * COLOR_CALC_STATE
716	 * DEPTH_STENCIL_STATE
717	 * BLEND_STATE
718	 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
719	 * Disable is clear, which we rely on)
720	 */
721       OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
722				   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
723
724       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
725       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
726		 1); /* Instruction base address: shader kernels (incl. SIP) */
727
728       OUT_BATCH(1); /* General state upper bound */
729       OUT_BATCH(1); /* Dynamic state upper bound */
730       OUT_BATCH(1); /* Indirect object upper bound */
731       OUT_BATCH(1); /* Instruction access upper bound */
732       ADVANCE_BATCH();
733   } else if (intel->gen == 5) {
734       BEGIN_BATCH(8);
735       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
736       OUT_BATCH(1); /* General state base address */
737       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
738		 1); /* Surface state base address */
739       OUT_BATCH(1); /* Indirect object base address */
740       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
741		 1); /* Instruction base address */
742       OUT_BATCH(1); /* General state upper bound */
743       OUT_BATCH(1); /* Indirect object upper bound */
744       OUT_BATCH(1); /* Instruction access upper bound */
745       ADVANCE_BATCH();
746   } else {
747       BEGIN_BATCH(6);
748       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
749       OUT_BATCH(1); /* General state base address */
750       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
751		 1); /* Surface state base address */
752       OUT_BATCH(1); /* Indirect object base address */
753       OUT_BATCH(1); /* General state upper bound */
754       OUT_BATCH(1); /* Indirect object upper bound */
755       ADVANCE_BATCH();
756   }
757
758   /* According to section 3.6.1 of VOL1 of the 965 PRM,
759    * STATE_BASE_ADDRESS updates require a reissue of:
760    *
761    * 3DSTATE_PIPELINE_POINTERS
762    * 3DSTATE_BINDING_TABLE_POINTERS
763    * MEDIA_STATE_POINTERS
764    *
765    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
766    * 1 part 1 says that the folowing packets must be reissued:
767    *
768    * 3DSTATE_CC_POINTERS
769    * 3DSTATE_BINDING_TABLE_POINTERS
770    * 3DSTATE_SAMPLER_STATE_POINTERS
771    * 3DSTATE_VIEWPORT_STATE_POINTERS
772    * MEDIA_STATE_POINTERS
773    *
774    * Those are always reissued following SBA updates anyway (new
775    * batch time), except in the case of the program cache BO
776    * changing.  Having a separate state flag makes the sequence more
777    * obvious.
778    */
779
780   brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
781}
782
783const struct brw_tracked_state brw_state_base_address = {
784   .dirty = {
785      .mesa = 0,
786      .brw = (BRW_NEW_BATCH |
787	      BRW_NEW_PROGRAM_CACHE),
788      .cache = 0,
789   },
790   .emit = upload_state_base_address
791};
792