brw_misc_state.c revision f99d5af03b0f97d7a1b7076b2142069770879471
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33
34#include "intel_batchbuffer.h"
35#include "intel_fbo.h"
36#include "intel_mipmap_tree.h"
37#include "intel_regions.h"
38
39#include "brw_context.h"
40#include "brw_state.h"
41#include "brw_defines.h"
42
43/* Constant single cliprect for framebuffer object or DRI2 drawing */
44static void upload_drawing_rect(struct brw_context *brw)
45{
46   struct intel_context *intel = &brw->intel;
47   struct gl_context *ctx = &intel->ctx;
48
49   BEGIN_BATCH(4);
50   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
51   OUT_BATCH(0); /* xmin, ymin */
52   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
53	    ((ctx->DrawBuffer->Height - 1) << 16));
54   OUT_BATCH(0);
55   ADVANCE_BATCH();
56}
57
58const struct brw_tracked_state brw_drawing_rect = {
59   .dirty = {
60      .mesa = _NEW_BUFFERS,
61      .brw = BRW_NEW_CONTEXT,
62      .cache = 0
63   },
64   .emit = upload_drawing_rect
65};
66
67/**
68 * Upload the binding table pointers, which point each stage's array of surface
69 * state pointers.
70 *
71 * The binding table pointers are relative to the surface state base address,
72 * which points at the batchbuffer containing the streamed batch state.
73 */
74static void upload_binding_table_pointers(struct brw_context *brw)
75{
76   struct intel_context *intel = &brw->intel;
77
78   BEGIN_BATCH(6);
79   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
80   OUT_BATCH(brw->bind.bo_offset);
81   OUT_BATCH(0); /* gs */
82   OUT_BATCH(0); /* clip */
83   OUT_BATCH(0); /* sf */
84   OUT_BATCH(brw->bind.bo_offset);
85   ADVANCE_BATCH();
86}
87
88const struct brw_tracked_state brw_binding_table_pointers = {
89   .dirty = {
90      .mesa = 0,
91      .brw = (BRW_NEW_BATCH |
92	      BRW_NEW_STATE_BASE_ADDRESS |
93	      BRW_NEW_VS_BINDING_TABLE |
94	      BRW_NEW_GS_BINDING_TABLE |
95	      BRW_NEW_PS_BINDING_TABLE),
96      .cache = 0,
97   },
98   .emit = upload_binding_table_pointers,
99};
100
101/**
102 * Upload the binding table pointers, which point each stage's array of surface
103 * state pointers.
104 *
105 * The binding table pointers are relative to the surface state base address,
106 * which points at the batchbuffer containing the streamed batch state.
107 */
108static void upload_gen6_binding_table_pointers(struct brw_context *brw)
109{
110   struct intel_context *intel = &brw->intel;
111
112   BEGIN_BATCH(4);
113   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
114	     GEN6_BINDING_TABLE_MODIFY_VS |
115	     GEN6_BINDING_TABLE_MODIFY_GS |
116	     GEN6_BINDING_TABLE_MODIFY_PS |
117	     (4 - 2));
118   OUT_BATCH(brw->bind.bo_offset); /* vs */
119   OUT_BATCH(0); /* gs */
120   OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
121   ADVANCE_BATCH();
122}
123
124const struct brw_tracked_state gen6_binding_table_pointers = {
125   .dirty = {
126      .mesa = 0,
127      .brw = (BRW_NEW_BATCH |
128	      BRW_NEW_STATE_BASE_ADDRESS |
129	      BRW_NEW_VS_BINDING_TABLE |
130	      BRW_NEW_GS_BINDING_TABLE |
131	      BRW_NEW_PS_BINDING_TABLE),
132      .cache = 0,
133   },
134   .emit = upload_gen6_binding_table_pointers,
135};
136
137/**
138 * Upload pointers to the per-stage state.
139 *
140 * The state pointers in this packet are all relative to the general state
141 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
142 */
143static void upload_pipelined_state_pointers(struct brw_context *brw )
144{
145   struct intel_context *intel = &brw->intel;
146
147   if (intel->gen == 5) {
148      /* Need to flush before changing clip max threads for errata. */
149      BEGIN_BATCH(1);
150      OUT_BATCH(MI_FLUSH);
151      ADVANCE_BATCH();
152   }
153
154   BEGIN_BATCH(7);
155   OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
156   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
157	     brw->vs.state_offset);
158   if (brw->gs.prog_active)
159      OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
160		brw->gs.state_offset | 1);
161   else
162      OUT_BATCH(0);
163   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
164	     brw->clip.state_offset | 1);
165   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
166	     brw->sf.state_offset);
167   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
168	     brw->wm.state_offset);
169   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
170	     brw->cc.state_offset);
171   ADVANCE_BATCH();
172
173   brw->state.dirty.brw |= BRW_NEW_PSP;
174}
175
176static void upload_psp_urb_cbs(struct brw_context *brw )
177{
178   upload_pipelined_state_pointers(brw);
179   brw_upload_urb_fence(brw);
180   brw_upload_cs_urb_state(brw);
181}
182
183const struct brw_tracked_state brw_psp_urb_cbs = {
184   .dirty = {
185      .mesa = 0,
186      .brw = (BRW_NEW_URB_FENCE |
187	      BRW_NEW_BATCH |
188	      BRW_NEW_STATE_BASE_ADDRESS),
189      .cache = (CACHE_NEW_VS_UNIT |
190		CACHE_NEW_GS_UNIT |
191		CACHE_NEW_GS_PROG |
192		CACHE_NEW_CLIP_UNIT |
193		CACHE_NEW_SF_UNIT |
194		CACHE_NEW_WM_UNIT |
195		CACHE_NEW_CC_UNIT)
196   },
197   .emit = upload_psp_urb_cbs,
198};
199
200static void emit_depthbuffer(struct brw_context *brw)
201{
202   struct intel_context *intel = &brw->intel;
203   struct gl_context *ctx = &intel->ctx;
204   struct gl_framebuffer *fb = ctx->DrawBuffer;
205   /* _NEW_BUFFERS */
206   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
207   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
208   struct intel_region *hiz_region = NULL;
209   unsigned int len;
210
211   if (depth_irb &&
212       depth_irb->mt &&
213       depth_irb->mt->hiz_mt) {
214      hiz_region = depth_irb->mt->hiz_mt->region;
215   }
216
217   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
218    * non-pipelined state that will need the PIPE_CONTROL workaround.
219    */
220   if (intel->gen == 6) {
221      intel_emit_post_sync_nonzero_flush(intel);
222      intel_emit_depth_stall_flushes(intel);
223   }
224
225   /*
226    * If either depth or stencil buffer has packed depth/stencil format,
227    * then don't use separate stencil. Emit only a depth buffer.
228    */
229   if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) {
230      stencil_irb = NULL;
231   } else if (!depth_irb && stencil_irb
232	      && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
233      depth_irb = stencil_irb;
234      stencil_irb = NULL;
235   }
236
237   if (intel->gen >= 6)
238      len = 7;
239   else if (intel->is_g4x || intel->gen == 5)
240      len = 6;
241   else
242      len = 5;
243
244   if (!depth_irb && !stencil_irb) {
245      BEGIN_BATCH(len);
246      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
247      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
248		(BRW_SURFACE_NULL << 29));
249      OUT_BATCH(0);
250      OUT_BATCH(0);
251      OUT_BATCH(0);
252
253      if (intel->is_g4x || intel->gen >= 5)
254         OUT_BATCH(0);
255
256      if (intel->gen >= 6)
257	 OUT_BATCH(0);
258
259      ADVANCE_BATCH();
260
261   } else if (!depth_irb && stencil_irb) {
262      /*
263       * There exists a separate stencil buffer but no depth buffer.
264       *
265       * The stencil buffer inherits most of its fields from
266       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
267       * height.
268       *
269       * Since the stencil buffer has quirky pitch requirements, its region
270       * was allocated with half height and double cpp. So we need
271       * a multiplier of 2 to obtain the surface's real height.
272       *
273       * Enable the hiz bit because it and the separate stencil bit must have
274       * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
275       * 1.21 "Separate Stencil Enable":
276       *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
277       *     Enable must also be enabled.
278       *
279       *     [DevGT]: This field must be set to the same value (enabled or
280       *     disabled) as Hierarchical Depth Buffer Enable
281       *
282       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
283       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
284       *     [DevGT+]: This field must be set to TRUE.
285       */
286      struct intel_region *region = stencil_irb->mt->region;
287
288      assert(intel->has_separate_stencil);
289      assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
290
291      BEGIN_BATCH(len);
292      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
293      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
294	        (1 << 21) | /* separate stencil enable */
295	        (1 << 22) | /* hiz enable */
296	        (BRW_TILEWALK_YMAJOR << 26) |
297	        (1 << 27) | /* tiled surface */
298	        (BRW_SURFACE_2D << 29));
299      OUT_BATCH(0);
300      OUT_BATCH(((region->width - 1) << 6) |
301	         (2 * region->height - 1) << 19);
302      OUT_BATCH(0);
303      OUT_BATCH(0);
304
305      if (intel->gen >= 6)
306	 OUT_BATCH(0);
307
308      ADVANCE_BATCH();
309
310   } else {
311      struct intel_region *region = depth_irb->mt->region;
312      unsigned int format;
313      uint32_t tile_x, tile_y, offset;
314
315      /* If using separate stencil, hiz must be enabled. */
316      assert(!stencil_irb || hiz_region);
317
318      switch (region->cpp) {
319      case 2:
320	 format = BRW_DEPTHFORMAT_D16_UNORM;
321	 break;
322      case 4:
323	 if (intel->depth_buffer_is_float)
324	    format = BRW_DEPTHFORMAT_D32_FLOAT;
325	 else if (hiz_region)
326	    format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
327	 else
328	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
329	 break;
330      default:
331	 assert(0);
332	 return;
333      }
334
335      offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
336
337      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
338      assert(!hiz_region || region->tiling == I915_TILING_Y);
339
340      BEGIN_BATCH(len);
341      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
342      OUT_BATCH(((region->pitch * region->cpp) - 1) |
343		(format << 18) |
344		((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
345		((hiz_region ? 1 : 0) << 22) | /* hiz enable */
346		(BRW_TILEWALK_YMAJOR << 26) |
347		((region->tiling != I915_TILING_NONE) << 27) |
348		(BRW_SURFACE_2D << 29));
349      OUT_RELOC(region->bo,
350		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
351		offset);
352      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
353		((region->width - 1) << 6) |
354		((region->height - 1) << 19));
355      OUT_BATCH(0);
356
357      if (intel->is_g4x || intel->gen >= 5)
358         OUT_BATCH(tile_x | (tile_y << 16));
359      else
360	 assert(tile_x == 0 && tile_y == 0);
361
362      if (intel->gen >= 6)
363	 OUT_BATCH(0);
364
365      ADVANCE_BATCH();
366   }
367
368   if (hiz_region || stencil_irb) {
369      /*
370       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
371       * stencil enable' and 'hiz enable' bits were set. Therefore we must
372       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
373       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
374       * failure to do so causes hangs on gen5 and a stall on gen6.
375       */
376
377      /* Emit hiz buffer. */
378      if (hiz_region) {
379	 BEGIN_BATCH(3);
380	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
381	 OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
382	 OUT_RELOC(hiz_region->bo,
383		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
384		   0);
385	 ADVANCE_BATCH();
386      } else {
387	 BEGIN_BATCH(3);
388	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
389	 OUT_BATCH(0);
390	 OUT_BATCH(0);
391	 ADVANCE_BATCH();
392      }
393
394      /* Emit stencil buffer. */
395      if (stencil_irb) {
396	 struct intel_region *region = stencil_irb->mt->region;
397	 BEGIN_BATCH(3);
398	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
399	 OUT_BATCH(region->pitch * region->cpp - 1);
400	 OUT_RELOC(region->bo,
401		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
402		   0);
403	 ADVANCE_BATCH();
404      } else {
405	 BEGIN_BATCH(3);
406	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
407	 OUT_BATCH(0);
408	 OUT_BATCH(0);
409	 ADVANCE_BATCH();
410      }
411   }
412
413   /*
414    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
415    * params must be emitted.
416    *
417    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
418    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
419    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
420    */
421   if (intel->gen >= 6 || hiz_region) {
422      if (intel->gen == 6)
423	 intel_emit_post_sync_nonzero_flush(intel);
424
425      BEGIN_BATCH(2);
426      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
427      OUT_BATCH(0);
428      ADVANCE_BATCH();
429   }
430}
431
432const struct brw_tracked_state brw_depthbuffer = {
433   .dirty = {
434      .mesa = _NEW_BUFFERS,
435      .brw = BRW_NEW_BATCH,
436      .cache = 0,
437   },
438   .emit = emit_depthbuffer,
439};
440
441
442
443/***********************************************************************
444 * Polygon stipple packet
445 */
446
447static void upload_polygon_stipple(struct brw_context *brw)
448{
449   struct intel_context *intel = &brw->intel;
450   struct gl_context *ctx = &brw->intel.ctx;
451   GLuint i;
452
453   /* _NEW_POLYGON */
454   if (!ctx->Polygon.StippleFlag)
455      return;
456
457   if (intel->gen == 6)
458      intel_emit_post_sync_nonzero_flush(intel);
459
460   BEGIN_BATCH(33);
461   OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
462
463   /* Polygon stipple is provided in OpenGL order, i.e. bottom
464    * row first.  If we're rendering to a window (i.e. the
465    * default frame buffer object, 0), then we need to invert
466    * it to match our pixel layout.  But if we're rendering
467    * to a FBO (i.e. any named frame buffer object), we *don't*
468    * need to invert - we already match the layout.
469    */
470   if (ctx->DrawBuffer->Name == 0) {
471      for (i = 0; i < 32; i++)
472	  OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
473   }
474   else {
475      for (i = 0; i < 32; i++)
476	 OUT_BATCH(ctx->PolygonStipple[i]);
477   }
478   CACHED_BATCH();
479}
480
481const struct brw_tracked_state brw_polygon_stipple = {
482   .dirty = {
483      .mesa = (_NEW_POLYGONSTIPPLE |
484	       _NEW_POLYGON),
485      .brw = BRW_NEW_CONTEXT,
486      .cache = 0
487   },
488   .emit = upload_polygon_stipple
489};
490
491
492/***********************************************************************
493 * Polygon stipple offset packet
494 */
495
496static void upload_polygon_stipple_offset(struct brw_context *brw)
497{
498   struct intel_context *intel = &brw->intel;
499   struct gl_context *ctx = &brw->intel.ctx;
500
501   /* _NEW_POLYGON */
502   if (!ctx->Polygon.StippleFlag)
503      return;
504
505   if (intel->gen == 6)
506      intel_emit_post_sync_nonzero_flush(intel);
507
508   BEGIN_BATCH(2);
509   OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
510
511   /* _NEW_BUFFERS
512    *
513    * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
514    * we have to invert the Y axis in order to match the OpenGL
515    * pixel coordinate system, and our offset must be matched
516    * to the window position.  If we're drawing to a FBO
517    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
518    * system works just fine, and there's no window system to
519    * worry about.
520    */
521   if (brw->intel.ctx.DrawBuffer->Name == 0)
522      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
523   else
524      OUT_BATCH(0);
525   CACHED_BATCH();
526}
527
528const struct brw_tracked_state brw_polygon_stipple_offset = {
529   .dirty = {
530      .mesa = (_NEW_BUFFERS |
531	       _NEW_POLYGON),
532      .brw = BRW_NEW_CONTEXT,
533      .cache = 0
534   },
535   .emit = upload_polygon_stipple_offset
536};
537
538/**********************************************************************
539 * AA Line parameters
540 */
541static void upload_aa_line_parameters(struct brw_context *brw)
542{
543   struct intel_context *intel = &brw->intel;
544   struct gl_context *ctx = &brw->intel.ctx;
545
546   if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
547      return;
548
549   if (intel->gen == 6)
550      intel_emit_post_sync_nonzero_flush(intel);
551
552   OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
553   /* use legacy aa line coverage computation */
554   OUT_BATCH(0);
555   OUT_BATCH(0);
556   CACHED_BATCH();
557}
558
559const struct brw_tracked_state brw_aa_line_parameters = {
560   .dirty = {
561      .mesa = _NEW_LINE,
562      .brw = BRW_NEW_CONTEXT,
563      .cache = 0
564   },
565   .emit = upload_aa_line_parameters
566};
567
568/***********************************************************************
569 * Line stipple packet
570 */
571
572static void upload_line_stipple(struct brw_context *brw)
573{
574   struct intel_context *intel = &brw->intel;
575   struct gl_context *ctx = &brw->intel.ctx;
576   GLfloat tmp;
577   GLint tmpi;
578
579   if (!ctx->Line.StippleFlag)
580      return;
581
582   if (intel->gen == 6)
583      intel_emit_post_sync_nonzero_flush(intel);
584
585   BEGIN_BATCH(3);
586   OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
587   OUT_BATCH(ctx->Line.StipplePattern);
588   tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
589   tmpi = tmp * (1<<13);
590   OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
591   CACHED_BATCH();
592}
593
594const struct brw_tracked_state brw_line_stipple = {
595   .dirty = {
596      .mesa = _NEW_LINE,
597      .brw = BRW_NEW_CONTEXT,
598      .cache = 0
599   },
600   .emit = upload_line_stipple
601};
602
603
604/***********************************************************************
605 * Misc invarient state packets
606 */
607
608static void upload_invarient_state( struct brw_context *brw )
609{
610   struct intel_context *intel = &brw->intel;
611
612   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
613   if (intel->gen == 6)
614      intel_emit_post_sync_nonzero_flush(intel);
615
616   /* Select the 3D pipeline (as opposed to media) */
617   BEGIN_BATCH(1);
618   OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
619   ADVANCE_BATCH();
620
621   if (intel->gen < 6) {
622      /* Disable depth offset clamping. */
623      BEGIN_BATCH(2);
624      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
625      OUT_BATCH_F(0.0);
626      ADVANCE_BATCH();
627   }
628
629   if (intel->gen >= 6) {
630      int i;
631      int len = intel->gen >= 7 ? 4 : 3;
632
633      BEGIN_BATCH(len);
634      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
635      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
636		MS_NUMSAMPLES_1);
637      OUT_BATCH(0); /* positions for 4/8-sample */
638      if (intel->gen >= 7)
639	 OUT_BATCH(0);
640      ADVANCE_BATCH();
641
642      BEGIN_BATCH(2);
643      OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
644      OUT_BATCH(1);
645      ADVANCE_BATCH();
646
647      if (intel->gen < 7) {
648	 for (i = 0; i < 4; i++) {
649	    BEGIN_BATCH(4);
650	    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
651	    OUT_BATCH(i << SVB_INDEX_SHIFT);
652	    OUT_BATCH(0);
653	    OUT_BATCH(0xffffffff);
654	    ADVANCE_BATCH();
655	 }
656      }
657   }
658
659   BEGIN_BATCH(2);
660   OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
661   OUT_BATCH(0);
662   ADVANCE_BATCH();
663
664   BEGIN_BATCH(1);
665   OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
666	     (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
667   ADVANCE_BATCH();
668}
669
670const struct brw_tracked_state brw_invarient_state = {
671   .dirty = {
672      .mesa = 0,
673      .brw = BRW_NEW_CONTEXT,
674      .cache = 0
675   },
676   .emit = upload_invarient_state
677};
678
679/**
680 * Define the base addresses which some state is referenced from.
681 *
682 * This allows us to avoid having to emit relocations for the objects,
683 * and is actually required for binding table pointers on gen6.
684 *
685 * Surface state base address covers binding table pointers and
686 * surface state objects, but not the surfaces that the surface state
687 * objects point to.
688 */
689static void upload_state_base_address( struct brw_context *brw )
690{
691   struct intel_context *intel = &brw->intel;
692
693   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
694    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
695    * programmed prior to STATE_BASE_ADDRESS.
696    *
697    * However, given that the instruction SBA (general state base
698    * address) on this chipset is always set to 0 across X and GL,
699    * maybe this isn't required for us in particular.
700    */
701
702   if (intel->gen >= 6) {
703      if (intel->gen == 6)
704	 intel_emit_post_sync_nonzero_flush(intel);
705
706       BEGIN_BATCH(10);
707       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
708       /* General state base address: stateless DP read/write requests */
709       OUT_BATCH(1);
710       /* Surface state base address:
711	* BINDING_TABLE_STATE
712	* SURFACE_STATE
713	*/
714       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
715        /* Dynamic state base address:
716	 * SAMPLER_STATE
717	 * SAMPLER_BORDER_COLOR_STATE
718	 * CLIP, SF, WM/CC viewport state
719	 * COLOR_CALC_STATE
720	 * DEPTH_STENCIL_STATE
721	 * BLEND_STATE
722	 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
723	 * Disable is clear, which we rely on)
724	 */
725       OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
726				   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
727
728       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
729       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
730		 1); /* Instruction base address: shader kernels (incl. SIP) */
731
732       OUT_BATCH(1); /* General state upper bound */
733       OUT_BATCH(1); /* Dynamic state upper bound */
734       OUT_BATCH(1); /* Indirect object upper bound */
735       OUT_BATCH(1); /* Instruction access upper bound */
736       ADVANCE_BATCH();
737   } else if (intel->gen == 5) {
738       BEGIN_BATCH(8);
739       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
740       OUT_BATCH(1); /* General state base address */
741       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
742		 1); /* Surface state base address */
743       OUT_BATCH(1); /* Indirect object base address */
744       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
745		 1); /* Instruction base address */
746       OUT_BATCH(1); /* General state upper bound */
747       OUT_BATCH(1); /* Indirect object upper bound */
748       OUT_BATCH(1); /* Instruction access upper bound */
749       ADVANCE_BATCH();
750   } else {
751       BEGIN_BATCH(6);
752       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
753       OUT_BATCH(1); /* General state base address */
754       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
755		 1); /* Surface state base address */
756       OUT_BATCH(1); /* Indirect object base address */
757       OUT_BATCH(1); /* General state upper bound */
758       OUT_BATCH(1); /* Indirect object upper bound */
759       ADVANCE_BATCH();
760   }
761
762   /* According to section 3.6.1 of VOL1 of the 965 PRM,
763    * STATE_BASE_ADDRESS updates require a reissue of:
764    *
765    * 3DSTATE_PIPELINE_POINTERS
766    * 3DSTATE_BINDING_TABLE_POINTERS
767    * MEDIA_STATE_POINTERS
768    *
769    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
770    * 1 part 1 says that the folowing packets must be reissued:
771    *
772    * 3DSTATE_CC_POINTERS
773    * 3DSTATE_BINDING_TABLE_POINTERS
774    * 3DSTATE_SAMPLER_STATE_POINTERS
775    * 3DSTATE_VIEWPORT_STATE_POINTERS
776    * MEDIA_STATE_POINTERS
777    *
778    * Those are always reissued following SBA updates anyway (new
779    * batch time), except in the case of the program cache BO
780    * changing.  Having a separate state flag makes the sequence more
781    * obvious.
782    */
783
784   brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
785}
786
787const struct brw_tracked_state brw_state_base_address = {
788   .dirty = {
789      .mesa = 0,
790      .brw = (BRW_NEW_BATCH |
791	      BRW_NEW_PROGRAM_CACHE),
792      .cache = 0,
793   },
794   .emit = upload_state_base_address
795};
796