brw_context.c revision 9308f298300beaa757194a0db8ed50924754c011
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/imports.h"
34#include "main/macros.h"
35#include "main/simple_list.h"
36
37#include "vbo/vbo_context.h"
38
39#include "brw_context.h"
40#include "brw_defines.h"
41#include "brw_draw.h"
42#include "brw_state.h"
43
44#include "gen6_hiz.h"
45
46#include "intel_fbo.h"
47#include "intel_mipmap_tree.h"
48#include "intel_regions.h"
49#include "intel_span.h"
50#include "intel_tex.h"
51#include "intel_tex_obj.h"
52
53#include "tnl/t_pipeline.h"
54#include "glsl/ralloc.h"
55
56/***************************************
57 * Mesa's Driver Functions
58 ***************************************/
59
60/**
61 * \brief Prepare for entry into glBegin/glEnd block.
62 *
63 * Resolve buffers before entering a glBegin/glEnd block. This is
64 * necessary to prevent recursive calls to FLUSH_VERTICES.
65 *
66 * This resolves the depth buffer of each enabled depth texture and the HiZ
67 * buffer of the attached depth renderbuffer.
68 *
69 * Details
70 * -------
71 * When vertices are queued during a glBegin/glEnd block, those vertices must
72 * be drawn before any rendering state changes. To ensure this, Mesa calls
73 * FLUSH_VERTICES as a prehook to such state changes. Therefore,
74 * FLUSH_VERTICES itself cannot change rendering state without falling into a
75 * recursive trap.
76 *
77 * This precludes meta-ops, namely buffer resolves, from occurring while any
78 * vertices are queued. To prevent that situation, we resolve some buffers on
79 * entering a glBegin/glEnd
80 *
81 * \see brwCleanupExecEnd()
82 */
83static void brwPrepareExecBegin(struct gl_context *ctx)
84{
85   struct brw_context *brw = brw_context(ctx);
86   struct intel_context *intel = &brw->intel;
87   struct intel_renderbuffer *draw_irb;
88   struct intel_texture_object *tex_obj;
89
90   if (!intel->has_hiz) {
91      /* The context uses no feature that requires buffer resolves. */
92      return;
93   }
94
95   /* Resolve each enabled texture. */
96   for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
97      if (!ctx->Texture.Unit[i]._ReallyEnabled)
98	 continue;
99      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
100      if (!tex_obj || !tex_obj->mt)
101	 continue;
102      intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
103   }
104
105   /* Resolve the attached depth buffer. */
106   draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
107   if (draw_irb) {
108      intel_renderbuffer_resolve_hiz(intel, draw_irb);
109   }
110}
111
112static void brwInitDriverFunctions( struct dd_function_table *functions )
113{
114   intelInitDriverFunctions( functions );
115
116   brwInitFragProgFuncs( functions );
117   brw_init_queryobj_functions(functions);
118
119   functions->PrepareExecBegin = brwPrepareExecBegin;
120}
121
122bool
123brwCreateContext(int api,
124	         const struct gl_config *mesaVis,
125		 __DRIcontext *driContextPriv,
126	         void *sharedContextPrivate)
127{
128   struct dd_function_table functions;
129   struct brw_context *brw = rzalloc(NULL, struct brw_context);
130   struct intel_context *intel = &brw->intel;
131   struct gl_context *ctx = &intel->ctx;
132   unsigned i;
133
134   if (!brw) {
135      printf("%s: failed to alloc context\n", __FUNCTION__);
136      return false;
137   }
138
139   brwInitDriverFunctions( &functions );
140
141   if (!intelInitContext( intel, api, mesaVis, driContextPriv,
142			  sharedContextPrivate, &functions )) {
143      printf("%s: failed to init intel context\n", __FUNCTION__);
144      FREE(brw);
145      return false;
146   }
147
148   brwInitVtbl( brw );
149
150   brw_init_surface_formats(brw);
151
152   /* Initialize swrast, tnl driver tables: */
153   intelInitSpanFuncs(ctx);
154
155   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
156
157   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
158   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
159   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
160   ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
161                                     ctx->Const.MaxTextureImageUnits);
162   ctx->Const.MaxVertexTextureImageUnits = BRW_MAX_TEX_UNIT;
163   ctx->Const.MaxCombinedTextureImageUnits =
164      ctx->Const.MaxVertexTextureImageUnits +
165      ctx->Const.MaxTextureImageUnits;
166
167   ctx->Const.MaxTextureLevels = 14; /* 8192 */
168   if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
169	   ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
170   ctx->Const.Max3DTextureLevels = 9;
171   ctx->Const.MaxCubeTextureLevels = 12;
172   /* minimum maximum.  Users are likely to run into memory problems
173    * even at this size, since 64 * 2048 * 2048 * 4 = 1GB and we can't
174    * address that much.
175    */
176   ctx->Const.MaxArrayTextureLayers = 64;
177   ctx->Const.MaxTextureRectSize = (1<<12);
178
179   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
180
181   /* Hardware only supports a limited number of transform feedback buffers.
182    * So we need to override the Mesa default (which is based only on software
183    * limits).
184    */
185   ctx->Const.MaxTransformFeedbackSeparateAttribs = BRW_MAX_SOL_BUFFERS;
186
187   /* On Gen6, in the worst case, we use up one binding table entry per
188    * transform feedback component (see comments above the definition of
189    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
190    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
191    * BRW_MAX_SOL_BINDINGS.
192    *
193    * In "separate components" mode, we need to divide this value by
194    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
195    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
196    */
197   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
198   ctx->Const.MaxTransformFeedbackSeparateComponents =
199      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
200
201   /* if conformance mode is set, swrast can handle any size AA point */
202   ctx->Const.MaxPointSizeAA = 255.0;
203
204   /* We want the GLSL compiler to emit code that uses condition codes */
205   for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
206      ctx->ShaderCompilerOptions[i].MaxIfDepth = intel->gen < 6 ? 16 : UINT_MAX;
207      ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
208      ctx->ShaderCompilerOptions[i].EmitNVTempInitialization = true;
209      ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
210      ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
211      ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
212      ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
213
214      ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
215	 (i == MESA_SHADER_FRAGMENT);
216      ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
217	 (i == MESA_SHADER_FRAGMENT);
218      ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
219   }
220
221   ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
222   ctx->Const.VertexProgram.MaxAluInstructions = 0;
223   ctx->Const.VertexProgram.MaxTexInstructions = 0;
224   ctx->Const.VertexProgram.MaxTexIndirections = 0;
225   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
226   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
227   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
228   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
229   ctx->Const.VertexProgram.MaxNativeTemps = 256;
230   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
231   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
232   ctx->Const.VertexProgram.MaxEnvParams =
233      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
234	   ctx->Const.VertexProgram.MaxEnvParams);
235
236   ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
237   ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
238   ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
239   ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
240   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
241   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
242   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
243   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
244   ctx->Const.FragmentProgram.MaxEnvParams =
245      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
246	   ctx->Const.FragmentProgram.MaxEnvParams);
247
248   /* Fragment shaders use real, 32-bit twos-complement integers for all
249    * integer types.
250    */
251   ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
252   ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
253   ctx->Const.FragmentProgram.LowInt.Precision = 0;
254   ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.MediumInt
255      = ctx->Const.FragmentProgram.LowInt;
256
257   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
258      but we're not sure how it's actually done for vertex order,
259      that affect provoking vertex decision. Always use last vertex
260      convention for quad primitive which works as expected for now. */
261   if (intel->gen >= 6)
262       ctx->Const.QuadsFollowProvokingVertexConvention = false;
263
264   if (intel->is_g4x || intel->gen >= 5) {
265      brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
266      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
267      brw->has_surface_tile_offset = true;
268      if (intel->gen < 6)
269	  brw->has_compr4 = true;
270      brw->has_aa_line_parameters = true;
271      brw->has_pln = true;
272  } else {
273      brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
274      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
275   }
276
277   /* WM maximum threads is number of EUs times number of threads per EU. */
278   if (intel->gen >= 7) {
279      if (intel->gt == 1) {
280	 brw->max_wm_threads = 86;
281	 brw->max_vs_threads = 36;
282	 brw->max_gs_threads = 36;
283	 brw->urb.size = 128;
284	 brw->urb.max_vs_entries = 512;
285	 brw->urb.max_gs_entries = 192;
286      } else if (intel->gt == 2) {
287	 brw->max_wm_threads = 86;
288	 brw->max_vs_threads = 128;
289	 brw->max_gs_threads = 128;
290	 brw->urb.size = 256;
291	 brw->urb.max_vs_entries = 704;
292	 brw->urb.max_gs_entries = 320;
293      } else {
294	 assert(!"Unknown gen7 device.");
295      }
296   } else if (intel->gen == 6) {
297      if (intel->gt == 2) {
298	 /* This could possibly be 80, but is supposed to require
299	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
300	  * GPU reset to change.
301	  */
302	 brw->max_wm_threads = 40;
303	 brw->max_vs_threads = 60;
304	 brw->max_gs_threads = 60;
305	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
306	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
307	 brw->urb.max_gs_entries = 256;
308      } else {
309	 brw->max_wm_threads = 40;
310	 brw->max_vs_threads = 24;
311	 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
312	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
313	 brw->urb.max_vs_entries = 128; /* volume 2a (see 3DSTATE_URB) */
314	 brw->urb.max_gs_entries = 256;
315      }
316      brw->urb.gen6_gs_previously_active = false;
317   } else if (intel->gen == 5) {
318      brw->urb.size = 1024;
319      brw->max_vs_threads = 72;
320      brw->max_gs_threads = 32;
321      brw->max_wm_threads = 12 * 6;
322   } else if (intel->is_g4x) {
323      brw->urb.size = 384;
324      brw->max_vs_threads = 32;
325      brw->max_gs_threads = 2;
326      brw->max_wm_threads = 10 * 5;
327   } else if (intel->gen < 6) {
328      brw->urb.size = 256;
329      brw->max_vs_threads = 16;
330      brw->max_gs_threads = 2;
331      brw->max_wm_threads = 8 * 4;
332      brw->has_negative_rhw_bug = true;
333   }
334
335   brw_init_state( brw );
336
337   brw->curbe.last_buf = calloc(1, 4096);
338   brw->curbe.next_buf = calloc(1, 4096);
339
340   brw->state.dirty.mesa = ~0;
341   brw->state.dirty.brw = ~0;
342
343   brw->emit_state_always = 0;
344
345   intel->batch.need_workaround_flush = true;
346
347   ctx->VertexProgram._MaintainTnlProgram = true;
348   ctx->FragmentProgram._MaintainTexEnvProgram = true;
349
350   brw_draw_init( brw );
351
352   brw->new_vs_backend = (getenv("INTEL_OLD_VS") == NULL);
353   brw->precompile = driQueryOptionb(&intel->optionCache, "shader_precompile");
354
355   /* If we're using the new shader backend, we require integer uniforms
356    * stored as actual integers.
357    */
358   if (brw->new_vs_backend) {
359      ctx->Const.NativeIntegers = true;
360      ctx->Const.UniformBooleanTrue = 1;
361   }
362
363   return true;
364}
365
366