brw_context.c revision 848c0e72f36d0e1e460193a2d30b2f631529156f
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/api_exec.h"
34#include "main/imports.h"
35#include "main/macros.h"
36#include "main/points.h"
37#include "main/simple_list.h"
38#include "main/version.h"
39#include "main/vtxfmt.h"
40
41#include "vbo/vbo_context.h"
42
43#include "brw_context.h"
44#include "brw_defines.h"
45#include "brw_draw.h"
46#include "brw_state.h"
47
48#include "intel_fbo.h"
49#include "intel_mipmap_tree.h"
50#include "intel_regions.h"
51#include "intel_tex.h"
52#include "intel_tex_obj.h"
53
54#include "tnl/t_pipeline.h"
55#include "glsl/ralloc.h"
56
57/***************************************
58 * Mesa's Driver Functions
59 ***************************************/
60
61static size_t
62brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
63                             GLenum internalFormat, int samples[16])
64{
65   struct brw_context *brw = brw_context(ctx);
66
67   (void) target;
68
69   switch (brw->gen) {
70   case 7:
71      samples[0] = 8;
72      samples[1] = 4;
73      return 2;
74
75   case 6:
76      samples[0] = 4;
77      return 1;
78
79   default:
80      samples[0] = 1;
81      return 1;
82   }
83}
84
85static void brwInitDriverFunctions(struct intel_screen *screen,
86				   struct dd_function_table *functions)
87{
88   intelInitDriverFunctions( functions );
89
90   brwInitFragProgFuncs( functions );
91   brw_init_common_queryobj_functions(functions);
92   if (screen->gen >= 6)
93      gen6_init_queryobj_functions(functions);
94   else
95      gen4_init_queryobj_functions(functions);
96
97   functions->QuerySamplesForFormat = brw_query_samples_for_format;
98
99   if (screen->gen >= 7) {
100      functions->BeginTransformFeedback = gen7_begin_transform_feedback;
101      functions->EndTransformFeedback = gen7_end_transform_feedback;
102   } else {
103      functions->BeginTransformFeedback = brw_begin_transform_feedback;
104      functions->EndTransformFeedback = brw_end_transform_feedback;
105   }
106
107   if (screen->gen >= 6)
108      functions->GetSamplePosition = gen6_get_sample_position;
109}
110
111static void
112brw_initialize_context_constants(struct brw_context *brw)
113{
114   struct gl_context *ctx = &brw->ctx;
115
116   ctx->Const.QueryCounterBits.Timestamp = 36;
117
118   ctx->Const.StripTextureBorder = true;
119
120   ctx->Const.MaxDualSourceDrawBuffers = 1;
121   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
122   ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
123   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
124   ctx->Const.MaxTextureUnits =
125      MIN2(ctx->Const.MaxTextureCoordUnits,
126           ctx->Const.FragmentProgram.MaxTextureImageUnits);
127   ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
128   ctx->Const.MaxCombinedTextureImageUnits =
129      ctx->Const.VertexProgram.MaxTextureImageUnits +
130      ctx->Const.FragmentProgram.MaxTextureImageUnits;
131
132   ctx->Const.MaxTextureLevels = 14; /* 8192 */
133   if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
134      ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
135   ctx->Const.Max3DTextureLevels = 9;
136   ctx->Const.MaxCubeTextureLevels = 12;
137
138   if (brw->gen >= 7)
139      ctx->Const.MaxArrayTextureLayers = 2048;
140   else
141      ctx->Const.MaxArrayTextureLayers = 512;
142
143   ctx->Const.MaxTextureRectSize = 1 << 12;
144
145   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
146
147   ctx->Const.MaxRenderbufferSize = 8192;
148
149   /* Hardware only supports a limited number of transform feedback buffers.
150    * So we need to override the Mesa default (which is based only on software
151    * limits).
152    */
153   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
154
155   /* On Gen6, in the worst case, we use up one binding table entry per
156    * transform feedback component (see comments above the definition of
157    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
158    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
159    * BRW_MAX_SOL_BINDINGS.
160    *
161    * In "separate components" mode, we need to divide this value by
162    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
163    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
164    */
165   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
166   ctx->Const.MaxTransformFeedbackSeparateComponents =
167      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
168
169   if (brw->gen == 6) {
170      ctx->Const.MaxSamples = 4;
171      ctx->Const.MaxColorTextureSamples = 4;
172      ctx->Const.MaxDepthTextureSamples = 4;
173      ctx->Const.MaxIntegerSamples = 4;
174   } else if (brw->gen >= 7) {
175      ctx->Const.MaxSamples = 8;
176      ctx->Const.MaxColorTextureSamples = 8;
177      ctx->Const.MaxDepthTextureSamples = 8;
178      ctx->Const.MaxIntegerSamples = 8;
179   }
180
181   ctx->Const.MinLineWidth = 1.0;
182   ctx->Const.MinLineWidthAA = 1.0;
183   ctx->Const.MaxLineWidth = 5.0;
184   ctx->Const.MaxLineWidthAA = 5.0;
185   ctx->Const.LineWidthGranularity = 0.5;
186
187   ctx->Const.MinPointSize = 1.0;
188   ctx->Const.MinPointSizeAA = 1.0;
189   ctx->Const.MaxPointSize = 255.0;
190   ctx->Const.MaxPointSizeAA = 255.0;
191   ctx->Const.PointSizeGranularity = 1.0;
192
193   if (brw->gen >= 5 || brw->is_g4x)
194      ctx->Const.MaxClipPlanes = 8;
195
196   ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
197   ctx->Const.VertexProgram.MaxAluInstructions = 0;
198   ctx->Const.VertexProgram.MaxTexInstructions = 0;
199   ctx->Const.VertexProgram.MaxTexIndirections = 0;
200   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
201   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
202   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
203   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
204   ctx->Const.VertexProgram.MaxNativeTemps = 256;
205   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
206   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
207   ctx->Const.VertexProgram.MaxEnvParams =
208      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
209	   ctx->Const.VertexProgram.MaxEnvParams);
210
211   ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
212   ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
213   ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
214   ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
215   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
216   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
217   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
218   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
219   ctx->Const.FragmentProgram.MaxEnvParams =
220      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
221	   ctx->Const.FragmentProgram.MaxEnvParams);
222
223   /* Fragment shaders use real, 32-bit twos-complement integers for all
224    * integer types.
225    */
226   ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
227   ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
228   ctx->Const.FragmentProgram.LowInt.Precision = 0;
229   ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
230   ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
231
232   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
233    * but we're not sure how it's actually done for vertex order,
234    * that affect provoking vertex decision. Always use last vertex
235    * convention for quad primitive which works as expected for now.
236    */
237   if (brw->gen >= 6)
238      ctx->Const.QuadsFollowProvokingVertexConvention = false;
239
240   ctx->Const.NativeIntegers = true;
241   ctx->Const.UniformBooleanTrue = 1;
242   ctx->Const.UniformBufferOffsetAlignment = 16;
243
244   ctx->Const.ForceGLSLExtensionsWarn =
245      driQueryOptionb(&brw->optionCache, "force_glsl_extensions_warn");
246
247   ctx->Const.DisableGLSLLineContinuations =
248      driQueryOptionb(&brw->optionCache, "disable_glsl_line_continuations");
249
250   if (brw->gen >= 6) {
251      ctx->Const.MaxVarying = 32;
252      ctx->Const.VertexProgram.MaxOutputComponents = 128;
253      ctx->Const.GeometryProgram.MaxInputComponents = 128;
254      ctx->Const.GeometryProgram.MaxOutputComponents = 128;
255      ctx->Const.FragmentProgram.MaxInputComponents = 128;
256   }
257
258   /* We want the GLSL compiler to emit code that uses condition codes */
259   for (int i = 0; i < MESA_SHADER_TYPES; i++) {
260      ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
261      ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
262      ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
263      ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
264      ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
265      ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
266
267      ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
268	 (i == MESA_SHADER_FRAGMENT);
269      ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
270	 (i == MESA_SHADER_FRAGMENT);
271      ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
272   }
273
274   ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
275}
276
277bool
278brwCreateContext(int api,
279	         const struct gl_config *mesaVis,
280		 __DRIcontext *driContextPriv,
281                 unsigned major_version,
282                 unsigned minor_version,
283                 uint32_t flags,
284                 unsigned *error,
285	         void *sharedContextPrivate)
286{
287   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
288   struct intel_screen *screen = sPriv->driverPrivate;
289   struct dd_function_table functions;
290
291   struct brw_context *brw = rzalloc(NULL, struct brw_context);
292   if (!brw) {
293      printf("%s: failed to alloc context\n", __FUNCTION__);
294      *error = __DRI_CTX_ERROR_NO_MEMORY;
295      return false;
296   }
297
298   /* brwInitVtbl needs to know the chipset generation so that it can set the
299    * right pointers.
300    */
301   brw->gen = screen->gen;
302
303   brwInitVtbl( brw );
304
305   brwInitDriverFunctions(screen, &functions);
306
307   struct gl_context *ctx = &brw->ctx;
308
309   if (!intelInitContext( brw, api, major_version, minor_version,
310                          mesaVis, driContextPriv,
311			  sharedContextPrivate, &functions,
312			  error)) {
313      ralloc_free(brw);
314      return false;
315   }
316
317   brw_initialize_context_constants(brw);
318
319   /* Reinitialize the context point state.  It depends on ctx->Const values. */
320   _mesa_init_point(ctx);
321
322   if (brw->gen >= 6) {
323      /* Create a new hardware context.  Using a hardware context means that
324       * our GPU state will be saved/restored on context switch, allowing us
325       * to assume that the GPU is in the same state we left it in.
326       *
327       * This is required for transform feedback buffer offsets, query objects,
328       * and also allows us to reduce how much state we have to emit.
329       */
330      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
331
332      if (!brw->hw_ctx) {
333         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
334         ralloc_free(brw);
335         return false;
336      }
337   }
338
339   brw_init_surface_formats(brw);
340
341   /* Initialize swrast, tnl driver tables: */
342   TNLcontext *tnl = TNL_CONTEXT(ctx);
343   if (tnl)
344      tnl->Driver.RunPipeline = _tnl_run_pipeline;
345
346   ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
347   ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
348   ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
349
350   if (brw->is_g4x || brw->gen >= 5) {
351      brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
352      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
353      brw->has_surface_tile_offset = true;
354      if (brw->gen < 6)
355	  brw->has_compr4 = true;
356      brw->has_aa_line_parameters = true;
357      brw->has_pln = true;
358  } else {
359      brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
360      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
361   }
362
363   /* WM maximum threads is number of EUs times number of threads per EU. */
364   assert(brw->gen <= 7);
365
366   if (brw->is_haswell) {
367      if (brw->gt == 1) {
368	 brw->max_wm_threads = 102;
369	 brw->max_vs_threads = 70;
370	 brw->max_gs_threads = 70;
371	 brw->urb.size = 128;
372         brw->urb.min_vs_entries = 32;
373	 brw->urb.max_vs_entries = 640;
374	 brw->urb.max_gs_entries = 256;
375      } else if (brw->gt == 2) {
376	 brw->max_wm_threads = 204;
377	 brw->max_vs_threads = 280;
378	 brw->max_gs_threads = 256;
379	 brw->urb.size = 256;
380         brw->urb.min_vs_entries = 64;
381	 brw->urb.max_vs_entries = 1664;
382	 brw->urb.max_gs_entries = 640;
383      } else if (brw->gt == 3) {
384	 brw->max_wm_threads = 408;
385	 brw->max_vs_threads = 280;
386	 brw->max_gs_threads = 256;
387	 brw->urb.size = 512;
388         brw->urb.min_vs_entries = 64;
389	 brw->urb.max_vs_entries = 1664;
390	 brw->urb.max_gs_entries = 640;
391      }
392   } else if (brw->gen == 7) {
393      if (brw->gt == 1) {
394	 brw->max_wm_threads = 48;
395	 brw->max_vs_threads = 36;
396	 brw->max_gs_threads = 36;
397	 brw->urb.size = 128;
398         brw->urb.min_vs_entries = 32;
399	 brw->urb.max_vs_entries = 512;
400	 brw->urb.max_gs_entries = 192;
401      } else if (brw->gt == 2) {
402	 brw->max_wm_threads = 172;
403	 brw->max_vs_threads = 128;
404	 brw->max_gs_threads = 128;
405	 brw->urb.size = 256;
406         brw->urb.min_vs_entries = 32;
407	 brw->urb.max_vs_entries = 704;
408	 brw->urb.max_gs_entries = 320;
409      } else {
410	 assert(!"Unknown gen7 device.");
411      }
412   } else if (brw->gen == 6) {
413      if (brw->gt == 2) {
414	 brw->max_wm_threads = 80;
415	 brw->max_vs_threads = 60;
416	 brw->max_gs_threads = 60;
417	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
418         brw->urb.min_vs_entries = 24;
419	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
420	 brw->urb.max_gs_entries = 256;
421      } else {
422	 brw->max_wm_threads = 40;
423	 brw->max_vs_threads = 24;
424	 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
425	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
426         brw->urb.min_vs_entries = 24;
427	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
428	 brw->urb.max_gs_entries = 256;
429      }
430      brw->urb.gen6_gs_previously_active = false;
431   } else if (brw->gen == 5) {
432      brw->urb.size = 1024;
433      brw->max_vs_threads = 72;
434      brw->max_gs_threads = 32;
435      brw->max_wm_threads = 12 * 6;
436   } else if (brw->is_g4x) {
437      brw->urb.size = 384;
438      brw->max_vs_threads = 32;
439      brw->max_gs_threads = 2;
440      brw->max_wm_threads = 10 * 5;
441   } else if (brw->gen < 6) {
442      brw->urb.size = 256;
443      brw->max_vs_threads = 16;
444      brw->max_gs_threads = 2;
445      brw->max_wm_threads = 8 * 4;
446      brw->has_negative_rhw_bug = true;
447   }
448
449   if (brw->gen <= 7) {
450      brw->needs_unlit_centroid_workaround = true;
451   }
452
453   brw->prim_restart.in_progress = false;
454   brw->prim_restart.enable_cut_index = false;
455
456   brw_init_state( brw );
457
458   if (brw->gen < 6) {
459      brw->curbe.last_buf = calloc(1, 4096);
460      brw->curbe.next_buf = calloc(1, 4096);
461   }
462
463   brw->state.dirty.mesa = ~0;
464   brw->state.dirty.brw = ~0;
465
466   /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
467    * dirty flags.
468    */
469   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
470
471   brw->emit_state_always = 0;
472
473   brw->batch.need_workaround_flush = true;
474
475   ctx->VertexProgram._MaintainTnlProgram = true;
476   ctx->FragmentProgram._MaintainTexEnvProgram = true;
477
478   brw_draw_init( brw );
479
480   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
481   brw->disable_derivative_optimization =
482      driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
483
484   ctx->Const.ContextFlags = 0;
485   if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
486      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
487
488   ctx->Debug.DebugOutput = GL_FALSE;
489   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
490      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
491      ctx->Debug.DebugOutput = GL_TRUE;
492
493      /* Turn on some extra GL_ARB_debug_output generation. */
494      brw->perf_debug = true;
495   }
496
497   brw_fs_alloc_reg_sets(brw);
498   brw_vec4_alloc_reg_set(brw);
499
500   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
501      brw_init_shader_time(brw);
502
503   _mesa_compute_version(ctx);
504
505   _mesa_initialize_dispatch_tables(ctx);
506   _mesa_initialize_vbo_vtxfmt(ctx);
507
508   if (ctx->Extensions.AMD_performance_monitor) {
509      brw_init_performance_monitors(brw);
510   }
511
512   return true;
513}
514
515