brw_context.c revision 9ec246796f95996868d61ffc9b52a2c1811bb66d
1/*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29  * Authors:
30  *   Keith Whitwell <keithw@vmware.com>
31  */
32
33
34#include "main/api_exec.h"
35#include "main/context.h"
36#include "main/fbobject.h"
37#include "main/extensions.h"
38#include "main/imports.h"
39#include "main/macros.h"
40#include "main/points.h"
41#include "main/version.h"
42#include "main/vtxfmt.h"
43#include "main/texobj.h"
44#include "main/framebuffer.h"
45
46#include "vbo/vbo_context.h"
47
48#include "drivers/common/driverfuncs.h"
49#include "drivers/common/meta.h"
50#include "utils.h"
51
52#include "brw_context.h"
53#include "brw_defines.h"
54#include "brw_compiler.h"
55#include "brw_draw.h"
56#include "brw_state.h"
57
58#include "intel_batchbuffer.h"
59#include "intel_buffer_objects.h"
60#include "intel_buffers.h"
61#include "intel_fbo.h"
62#include "intel_mipmap_tree.h"
63#include "intel_pixel.h"
64#include "intel_image.h"
65#include "intel_tex.h"
66#include "intel_tex_obj.h"
67
68#include "swrast_setup/swrast_setup.h"
69#include "tnl/tnl.h"
70#include "tnl/t_pipeline.h"
71#include "util/ralloc.h"
72#include "util/debug.h"
73
74/***************************************
75 * Mesa's Driver Functions
76 ***************************************/
77
78const char *const brw_vendor_string = "Intel Open Source Technology Center";
79
80const char *
81brw_get_renderer_string(unsigned deviceID)
82{
83   const char *chipset;
84   static char buffer[128];
85
86   switch (deviceID) {
87#undef CHIPSET
88#define CHIPSET(id, symbol, str) case id: chipset = str; break;
89#include "pci_ids/i965_pci_ids.h"
90   default:
91      chipset = "Unknown Intel Chipset";
92      break;
93   }
94
95   (void) driGetRendererString(buffer, chipset, 0);
96   return buffer;
97}
98
99static const GLubyte *
100intel_get_string(struct gl_context * ctx, GLenum name)
101{
102   const struct brw_context *const brw = brw_context(ctx);
103
104   switch (name) {
105   case GL_VENDOR:
106      return (GLubyte *) brw_vendor_string;
107
108   case GL_RENDERER:
109      return
110         (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
111
112   default:
113      return NULL;
114   }
115}
116
117static void
118intel_viewport(struct gl_context *ctx)
119{
120   struct brw_context *brw = brw_context(ctx);
121   __DRIcontext *driContext = brw->driContext;
122
123   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
124      if (driContext->driDrawablePriv)
125         dri2InvalidateDrawable(driContext->driDrawablePriv);
126      if (driContext->driReadablePriv)
127         dri2InvalidateDrawable(driContext->driReadablePriv);
128   }
129}
130
131static void
132intel_update_framebuffer(struct gl_context *ctx,
133                         struct gl_framebuffer *fb)
134{
135   struct brw_context *brw = brw_context(ctx);
136
137   /* Quantize the derived default number of samples
138    */
139   fb->DefaultGeometry._NumSamples =
140      intel_quantize_num_samples(brw->intelScreen,
141                                 fb->DefaultGeometry.NumSamples);
142}
143
144static void
145intel_update_state(struct gl_context * ctx, GLuint new_state)
146{
147   struct brw_context *brw = brw_context(ctx);
148   struct intel_texture_object *tex_obj;
149   struct intel_renderbuffer *depth_irb;
150
151   if (ctx->swrast_context)
152      _swrast_InvalidateState(ctx, new_state);
153   _vbo_InvalidateState(ctx, new_state);
154
155   brw->NewGLState |= new_state;
156
157   _mesa_unlock_context_textures(ctx);
158
159   /* Resolve the depth buffer's HiZ buffer. */
160   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
161   if (depth_irb)
162      intel_renderbuffer_resolve_hiz(brw, depth_irb);
163
164   /* Resolve depth buffer and render cache of each enabled texture. */
165   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
166   for (int i = 0; i <= maxEnabledUnit; i++) {
167      if (!ctx->Texture.Unit[i]._Current)
168	 continue;
169      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
170      if (!tex_obj || !tex_obj->mt)
171	 continue;
172      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
173      /* Sampling engine understands lossless compression and resolving
174       * those surfaces should be skipped for performance reasons.
175       */
176      intel_miptree_resolve_color(brw, tex_obj->mt,
177                                  INTEL_MIPTREE_IGNORE_CCS_E);
178      brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
179   }
180
181   /* Resolve color for each active shader image. */
182   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
183      const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
184         ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
185
186      if (unlikely(shader && shader->NumImages)) {
187         for (unsigned j = 0; j < shader->NumImages; j++) {
188            struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
189            tex_obj = intel_texture_object(u->TexObj);
190
191            if (tex_obj && tex_obj->mt) {
192               /* Access to images is implemented using indirect messages
193                * against data port. Normal render target write understands
194                * lossless compression but unfortunately the typed/untyped
195                * read/write interface doesn't. Therefore the compressed
196                * surfaces need to be resolved prior to accessing them.
197                */
198               intel_miptree_resolve_color(brw, tex_obj->mt, 0);
199               brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
200            }
201         }
202      }
203   }
204
205   /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
206    * single-sampled color renderbuffers because the CCS buffer isn't
207    * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
208    * enabled because otherwise the surface state will be programmed with the
209    * linear equivalent format anyway.
210    */
211   if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
212      struct gl_framebuffer *fb = ctx->DrawBuffer;
213      for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
214         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
215
216         if (rb == NULL)
217            continue;
218
219         struct intel_renderbuffer *irb = intel_renderbuffer(rb);
220         struct intel_mipmap_tree *mt = irb->mt;
221
222         if (mt == NULL ||
223             mt->num_samples > 1 ||
224             _mesa_get_srgb_format_linear(mt->format) == mt->format)
225               continue;
226
227         /* Lossless compression is not supported for SRGB formats, it
228          * should be impossible to get here with such surfaces.
229          */
230         assert(!intel_miptree_is_lossless_compressed(brw, mt));
231         intel_miptree_resolve_color(brw, mt, 0);
232         brw_render_cache_set_check_flush(brw, mt->bo);
233      }
234   }
235
236   _mesa_lock_context_textures(ctx);
237
238   if (new_state & _NEW_BUFFERS) {
239      intel_update_framebuffer(ctx, ctx->DrawBuffer);
240      if (ctx->DrawBuffer != ctx->ReadBuffer)
241         intel_update_framebuffer(ctx, ctx->ReadBuffer);
242   }
243}
244
245#define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
246
247static void
248intel_flush_front(struct gl_context *ctx)
249{
250   struct brw_context *brw = brw_context(ctx);
251   __DRIcontext *driContext = brw->driContext;
252   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
253   __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
254
255   if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
256      if (flushFront(screen) && driDrawable &&
257          driDrawable->loaderPrivate) {
258
259         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
260          *
261          * This potentially resolves both front and back buffer. It
262          * is unnecessary to resolve the back, but harms nothing except
263          * performance. And no one cares about front-buffer render
264          * performance.
265          */
266         intel_resolve_for_dri2_flush(brw, driDrawable);
267         intel_batchbuffer_flush(brw);
268
269         flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
270
271         /* We set the dirty bit in intel_prepare_render() if we're
272          * front buffer rendering once we get there.
273          */
274         brw->front_buffer_dirty = false;
275      }
276   }
277}
278
279static void
280intel_glFlush(struct gl_context *ctx)
281{
282   struct brw_context *brw = brw_context(ctx);
283
284   intel_batchbuffer_flush(brw);
285   intel_flush_front(ctx);
286
287   brw->need_flush_throttle = true;
288}
289
290static void
291intel_finish(struct gl_context * ctx)
292{
293   struct brw_context *brw = brw_context(ctx);
294
295   intel_glFlush(ctx);
296
297   if (brw->batch.last_bo)
298      drm_intel_bo_wait_rendering(brw->batch.last_bo);
299}
300
301static void
302brw_init_driver_functions(struct brw_context *brw,
303                          struct dd_function_table *functions)
304{
305   _mesa_init_driver_functions(functions);
306
307   /* GLX uses DRI2 invalidate events to handle window resizing.
308    * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
309    * which doesn't provide a mechanism for snooping the event queues.
310    *
311    * So EGL still relies on viewport hacks to handle window resizing.
312    * This should go away with DRI3000.
313    */
314   if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
315      functions->Viewport = intel_viewport;
316
317   functions->Flush = intel_glFlush;
318   functions->Finish = intel_finish;
319   functions->GetString = intel_get_string;
320   functions->UpdateState = intel_update_state;
321
322   intelInitTextureFuncs(functions);
323   intelInitTextureImageFuncs(functions);
324   intelInitTextureSubImageFuncs(functions);
325   intelInitTextureCopyImageFuncs(functions);
326   intelInitCopyImageFuncs(functions);
327   intelInitClearFuncs(functions);
328   intelInitBufferFuncs(functions);
329   intelInitPixelFuncs(functions);
330   intelInitBufferObjectFuncs(functions);
331   intel_init_syncobj_functions(functions);
332   brw_init_object_purgeable_functions(functions);
333
334   brwInitFragProgFuncs( functions );
335   brw_init_common_queryobj_functions(functions);
336   if (brw->gen >= 6)
337      gen6_init_queryobj_functions(functions);
338   else
339      gen4_init_queryobj_functions(functions);
340   brw_init_compute_functions(functions);
341   if (brw->gen >= 7)
342      brw_init_conditional_render_functions(functions);
343
344   functions->QueryInternalFormat = brw_query_internal_format;
345
346   functions->NewTransformFeedback = brw_new_transform_feedback;
347   functions->DeleteTransformFeedback = brw_delete_transform_feedback;
348   functions->GetTransformFeedbackVertexCount =
349      brw_get_transform_feedback_vertex_count;
350   if (brw->gen >= 7) {
351      functions->BeginTransformFeedback = gen7_begin_transform_feedback;
352      functions->EndTransformFeedback = gen7_end_transform_feedback;
353      functions->PauseTransformFeedback = gen7_pause_transform_feedback;
354      functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
355   } else {
356      functions->BeginTransformFeedback = brw_begin_transform_feedback;
357      functions->EndTransformFeedback = brw_end_transform_feedback;
358   }
359
360   if (brw->gen >= 6)
361      functions->GetSamplePosition = gen6_get_sample_position;
362}
363
364static void
365brw_initialize_context_constants(struct brw_context *brw)
366{
367   struct gl_context *ctx = &brw->ctx;
368   const struct brw_compiler *compiler = brw->intelScreen->compiler;
369
370   const bool stage_exists[MESA_SHADER_STAGES] = {
371      [MESA_SHADER_VERTEX] = true,
372      [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
373      [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
374      [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
375      [MESA_SHADER_FRAGMENT] = true,
376      [MESA_SHADER_COMPUTE] =
377         (ctx->API == API_OPENGL_CORE &&
378          ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
379         (ctx->API == API_OPENGLES2 &&
380          ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
381         _mesa_extension_override_enables.ARB_compute_shader,
382   };
383
384   unsigned num_stages = 0;
385   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
386      if (stage_exists[i])
387         num_stages++;
388   }
389
390   unsigned max_samplers =
391      brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
392
393   ctx->Const.MaxDualSourceDrawBuffers = 1;
394   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
395   ctx->Const.MaxCombinedShaderOutputResources =
396      MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
397
398   ctx->Const.QueryCounterBits.Timestamp = 36;
399
400   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
401   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
402   ctx->Const.MaxRenderbufferSize = 8192;
403   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
404   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
405   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
406   ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
407   ctx->Const.MaxTextureMbytes = 1536;
408   ctx->Const.MaxTextureRectSize = 1 << 12;
409   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
410   ctx->Const.StripTextureBorder = true;
411   if (brw->gen >= 7)
412      ctx->Const.MaxProgramTextureGatherComponents = 4;
413   else if (brw->gen == 6)
414      ctx->Const.MaxProgramTextureGatherComponents = 1;
415
416   ctx->Const.MaxUniformBlockSize = 65536;
417
418   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
419      struct gl_program_constants *prog = &ctx->Const.Program[i];
420
421      if (!stage_exists[i])
422         continue;
423
424      prog->MaxTextureImageUnits = max_samplers;
425
426      prog->MaxUniformBlocks = BRW_MAX_UBO;
427      prog->MaxCombinedUniformComponents =
428         prog->MaxUniformComponents +
429         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
430
431      prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
432      prog->MaxAtomicBuffers = BRW_MAX_ABO;
433      prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
434      prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
435   }
436
437   ctx->Const.MaxTextureUnits =
438      MIN2(ctx->Const.MaxTextureCoordUnits,
439           ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
440
441   ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
442   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
443   ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
444   ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
445   ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
446   ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
447   ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
448
449
450   /* Hardware only supports a limited number of transform feedback buffers.
451    * So we need to override the Mesa default (which is based only on software
452    * limits).
453    */
454   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
455
456   /* On Gen6, in the worst case, we use up one binding table entry per
457    * transform feedback component (see comments above the definition of
458    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
459    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
460    * BRW_MAX_SOL_BINDINGS.
461    *
462    * In "separate components" mode, we need to divide this value by
463    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
464    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
465    */
466   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
467   ctx->Const.MaxTransformFeedbackSeparateComponents =
468      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
469
470   ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
471
472   int max_samples;
473   const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
474   const int clamp_max_samples =
475      driQueryOptioni(&brw->optionCache, "clamp_max_samples");
476
477   if (clamp_max_samples < 0) {
478      max_samples = msaa_modes[0];
479   } else {
480      /* Select the largest supported MSAA mode that does not exceed
481       * clamp_max_samples.
482       */
483      max_samples = 0;
484      for (int i = 0; msaa_modes[i] != 0; ++i) {
485         if (msaa_modes[i] <= clamp_max_samples) {
486            max_samples = msaa_modes[i];
487            break;
488         }
489      }
490   }
491
492   ctx->Const.MaxSamples = max_samples;
493   ctx->Const.MaxColorTextureSamples = max_samples;
494   ctx->Const.MaxDepthTextureSamples = max_samples;
495   ctx->Const.MaxIntegerSamples = max_samples;
496   ctx->Const.MaxImageSamples = 0;
497
498   /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
499    * to map indices of rectangular grid to sample numbers within a pixel.
500    * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
501    * extension implementation. For more details see the comment above
502    * gen6_set_sample_maps() definition.
503    */
504   gen6_set_sample_maps(ctx);
505
506   ctx->Const.MinLineWidth = 1.0;
507   ctx->Const.MinLineWidthAA = 1.0;
508   if (brw->gen >= 6) {
509      ctx->Const.MaxLineWidth = 7.375;
510      ctx->Const.MaxLineWidthAA = 7.375;
511      ctx->Const.LineWidthGranularity = 0.125;
512   } else {
513      ctx->Const.MaxLineWidth = 7.0;
514      ctx->Const.MaxLineWidthAA = 7.0;
515      ctx->Const.LineWidthGranularity = 0.5;
516   }
517
518   /* For non-antialiased lines, we have to round the line width to the
519    * nearest whole number. Make sure that we don't advertise a line
520    * width that, when rounded, will be beyond the actual hardware
521    * maximum.
522    */
523   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
524
525   ctx->Const.MinPointSize = 1.0;
526   ctx->Const.MinPointSizeAA = 1.0;
527   ctx->Const.MaxPointSize = 255.0;
528   ctx->Const.MaxPointSizeAA = 255.0;
529   ctx->Const.PointSizeGranularity = 1.0;
530
531   if (brw->gen >= 5 || brw->is_g4x)
532      ctx->Const.MaxClipPlanes = 8;
533
534   ctx->Const.LowerTessLevel = true;
535
536   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
537   ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
538   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
539   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
540   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
541   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
542   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
543   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
544   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
545   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
546   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
547   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
548      MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
549	   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
550
551   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
552   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
553   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
554   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
555   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
556   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
557   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
558   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
559   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
560      MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
561	   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
562
563   /* Fragment shaders use real, 32-bit twos-complement integers for all
564    * integer types.
565    */
566   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
567   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
568   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
569   ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
570   ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
571
572   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
573   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
574   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
575   ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
576   ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
577
578   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
579    * but we're not sure how it's actually done for vertex order,
580    * that affect provoking vertex decision. Always use last vertex
581    * convention for quad primitive which works as expected for now.
582    */
583   if (brw->gen >= 6)
584      ctx->Const.QuadsFollowProvokingVertexConvention = false;
585
586   ctx->Const.NativeIntegers = true;
587   ctx->Const.VertexID_is_zero_based = true;
588
589   /* Regarding the CMP instruction, the Ivybridge PRM says:
590    *
591    *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
592    *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
593    *    0xFFFFFFFF) is assigned to dst."
594    *
595    * but PRMs for earlier generations say
596    *
597    *   "In dword format, one GRF may store up to 8 results. When the register
598    *    is used later as a vector of Booleans, as only LSB at each channel
599    *    contains meaning [sic] data, software should make sure all higher bits
600    *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
601    *
602    * We select the representation of a true boolean uniform to be ~0, and fix
603    * the results of Gen <= 5 CMP instruction's with -(result & 1).
604    */
605   ctx->Const.UniformBooleanTrue = ~0;
606
607   /* From the gen4 PRM, volume 4 page 127:
608    *
609    *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
610    *      the base address of the first element of the surface, computed in
611    *      software by adding the surface base address to the byte offset of
612    *      the element in the buffer."
613    *
614    * However, unaligned accesses are slower, so enforce buffer alignment.
615    */
616   ctx->Const.UniformBufferOffsetAlignment = 16;
617
618   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
619    * that we can safely have the CPU and GPU writing the same SSBO on
620    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
621    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
622    * be updating disjoint regions of the buffer simultaneously and that will
623    * break if the regions overlap the same cacheline.
624    */
625   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
626   ctx->Const.TextureBufferOffsetAlignment = 16;
627   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
628
629   if (brw->gen >= 6) {
630      ctx->Const.MaxVarying = 32;
631      ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
632      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
633      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
634      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
635      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
636      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
637      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
638      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
639   }
640
641   /* We want the GLSL compiler to emit code that uses condition codes */
642   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
643      ctx->Const.ShaderCompilerOptions[i] =
644         brw->intelScreen->compiler->glsl_compiler_options[i];
645   }
646
647   if (brw->gen >= 7) {
648      ctx->Const.MaxViewportWidth = 32768;
649      ctx->Const.MaxViewportHeight = 32768;
650   }
651
652   /* ARB_viewport_array */
653   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
654      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
655      ctx->Const.ViewportSubpixelBits = 0;
656
657      /* Cast to float before negating because MaxViewportWidth is unsigned.
658       */
659      ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
660      ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
661   }
662
663   /* ARB_gpu_shader5 */
664   if (brw->gen >= 7)
665      ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
666
667   /* ARB_framebuffer_no_attachments */
668   ctx->Const.MaxFramebufferWidth = 16384;
669   ctx->Const.MaxFramebufferHeight = 16384;
670   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
671   ctx->Const.MaxFramebufferSamples = max_samples;
672}
673
674static void
675brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
676{
677   struct gl_context *ctx = &brw->ctx;
678
679   /* For ES, we set these constants based on SIMD8.
680    *
681    * TODO: Once we can always generate SIMD16, we should update this.
682    *
683    * For GL, we assume we can generate a SIMD16 program, but this currently
684    * is not always true. This allows us to run more test cases, and will be
685    * required based on desktop GL compute shader requirements.
686    */
687   const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
688
689   const uint32_t max_invocations = simd_size * max_threads;
690   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
691   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
692   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
693   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
694   ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
695}
696
697/**
698 * Process driconf (drirc) options, setting appropriate context flags.
699 *
700 * intelInitExtensions still pokes at optionCache directly, in order to
701 * avoid advertising various extensions.  No flags are set, so it makes
702 * sense to continue doing that there.
703 */
704static void
705brw_process_driconf_options(struct brw_context *brw)
706{
707   struct gl_context *ctx = &brw->ctx;
708
709   driOptionCache *options = &brw->optionCache;
710   driParseConfigFiles(options, &brw->intelScreen->optionCache,
711                       brw->driContext->driScreenPriv->myNum, "i965");
712
713   int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
714   switch (bo_reuse_mode) {
715   case DRI_CONF_BO_REUSE_DISABLED:
716      break;
717   case DRI_CONF_BO_REUSE_ALL:
718      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
719      break;
720   }
721
722   if (!driQueryOptionb(options, "hiz")) {
723       brw->has_hiz = false;
724       /* On gen6, you can only do separate stencil with HIZ. */
725       if (brw->gen == 6)
726          brw->has_separate_stencil = false;
727   }
728
729   if (driQueryOptionb(options, "always_flush_batch")) {
730      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
731      brw->always_flush_batch = true;
732   }
733
734   if (driQueryOptionb(options, "always_flush_cache")) {
735      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
736      brw->always_flush_cache = true;
737   }
738
739   if (driQueryOptionb(options, "disable_throttling")) {
740      fprintf(stderr, "disabling flush throttling\n");
741      brw->disable_throttling = true;
742   }
743
744   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
745
746   ctx->Const.ForceGLSLExtensionsWarn =
747      driQueryOptionb(options, "force_glsl_extensions_warn");
748
749   ctx->Const.DisableGLSLLineContinuations =
750      driQueryOptionb(options, "disable_glsl_line_continuations");
751
752   ctx->Const.AllowGLSLExtensionDirectiveMidShader =
753      driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
754
755   brw->dual_color_blend_by_location =
756      driQueryOptionb(options, "dual_color_blend_by_location");
757}
758
759GLboolean
760brwCreateContext(gl_api api,
761	         const struct gl_config *mesaVis,
762		 __DRIcontext *driContextPriv,
763                 unsigned major_version,
764                 unsigned minor_version,
765                 uint32_t flags,
766                 bool notify_reset,
767                 unsigned *dri_ctx_error,
768	         void *sharedContextPrivate)
769{
770   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
771   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
772   struct intel_screen *screen = sPriv->driverPrivate;
773   const struct brw_device_info *devinfo = screen->devinfo;
774   struct dd_function_table functions;
775
776   /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
777    * provides us with context reset notifications.
778    */
779   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
780      | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
781
782   if (screen->has_context_reset_notification)
783      allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
784
785   if (flags & ~allowed_flags) {
786      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
787      return false;
788   }
789
790   struct brw_context *brw = rzalloc(NULL, struct brw_context);
791   if (!brw) {
792      fprintf(stderr, "%s: failed to alloc context\n", __func__);
793      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
794      return false;
795   }
796
797   driContextPriv->driverPrivate = brw;
798   brw->driContext = driContextPriv;
799   brw->intelScreen = screen;
800   brw->bufmgr = screen->bufmgr;
801
802   brw->gen = devinfo->gen;
803   brw->gt = devinfo->gt;
804   brw->is_g4x = devinfo->is_g4x;
805   brw->is_baytrail = devinfo->is_baytrail;
806   brw->is_haswell = devinfo->is_haswell;
807   brw->is_cherryview = devinfo->is_cherryview;
808   brw->is_broxton = devinfo->is_broxton;
809   brw->has_llc = devinfo->has_llc;
810   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
811   brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
812   brw->has_pln = devinfo->has_pln;
813   brw->has_compr4 = devinfo->has_compr4;
814   brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
815   brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
816   brw->needs_unlit_centroid_workaround =
817      devinfo->needs_unlit_centroid_workaround;
818
819   brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
820   brw->has_swizzling = screen->hw_has_swizzling;
821
822   brw->vs.base.stage = MESA_SHADER_VERTEX;
823   brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
824   brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
825   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
826   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
827   if (brw->gen >= 8) {
828      gen8_init_vtable_surface_functions(brw);
829      brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
830   } else if (brw->gen >= 7) {
831      gen7_init_vtable_surface_functions(brw);
832      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
833   } else if (brw->gen >= 6) {
834      gen6_init_vtable_surface_functions(brw);
835      brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
836   } else {
837      gen4_init_vtable_surface_functions(brw);
838      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
839   }
840
841   brw_init_driver_functions(brw, &functions);
842
843   if (notify_reset)
844      functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
845
846   struct gl_context *ctx = &brw->ctx;
847
848   if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
849      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
850      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
851      intelDestroyContext(driContextPriv);
852      return false;
853   }
854
855   driContextSetFlags(ctx, flags);
856
857   /* Initialize the software rasterizer and helper modules.
858    *
859    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
860    * software fallbacks (which we have to support on legacy GL to do weird
861    * glDrawPixels(), glBitmap(), and other functions).
862    */
863   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
864      _swrast_CreateContext(ctx);
865   }
866
867   _vbo_CreateContext(ctx);
868   if (ctx->swrast_context) {
869      _tnl_CreateContext(ctx);
870      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
871      _swsetup_CreateContext(ctx);
872
873      /* Configure swrast to match hardware characteristics: */
874      _swrast_allow_pixel_fog(ctx, false);
875      _swrast_allow_vertex_fog(ctx, true);
876   }
877
878   _mesa_meta_init(ctx);
879
880   brw_process_driconf_options(brw);
881
882   if (INTEL_DEBUG & DEBUG_PERF)
883      brw->perf_debug = true;
884
885   brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
886   brw_initialize_context_constants(brw);
887
888   ctx->Const.ResetStrategy = notify_reset
889      ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
890
891   /* Reinitialize the context point state.  It depends on ctx->Const values. */
892   _mesa_init_point(ctx);
893
894   intel_fbo_init(brw);
895
896   intel_batchbuffer_init(brw);
897
898   if (brw->gen >= 6) {
899      /* Create a new hardware context.  Using a hardware context means that
900       * our GPU state will be saved/restored on context switch, allowing us
901       * to assume that the GPU is in the same state we left it in.
902       *
903       * This is required for transform feedback buffer offsets, query objects,
904       * and also allows us to reduce how much state we have to emit.
905       */
906      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
907
908      if (!brw->hw_ctx) {
909         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
910         intelDestroyContext(driContextPriv);
911         return false;
912      }
913   }
914
915   if (brw_init_pipe_control(brw, devinfo)) {
916      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
917      intelDestroyContext(driContextPriv);
918      return false;
919   }
920
921   brw_init_state(brw);
922
923   intelInitExtensions(ctx);
924
925   brw_init_surface_formats(brw);
926
927   brw->max_vs_threads = devinfo->max_vs_threads;
928   brw->max_hs_threads = devinfo->max_hs_threads;
929   brw->max_ds_threads = devinfo->max_ds_threads;
930   brw->max_gs_threads = devinfo->max_gs_threads;
931   brw->max_wm_threads = devinfo->max_wm_threads;
932   brw->max_cs_threads = devinfo->max_cs_threads;
933   brw->urb.size = devinfo->urb.size;
934   brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
935   brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
936   brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
937   brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
938   brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
939
940   /* Estimate the size of the mappable aperture into the GTT.  There's an
941    * ioctl to get the whole GTT size, but not one to get the mappable subset.
942    * It turns out it's basically always 256MB, though some ancient hardware
943    * was smaller.
944    */
945   uint32_t gtt_size = 256 * 1024 * 1024;
946
947   /* We don't want to map two objects such that a memcpy between them would
948    * just fault one mapping in and then the other over and over forever.  So
949    * we would need to divide the GTT size by 2.  Additionally, some GTT is
950    * taken up by things like the framebuffer and the ringbuffer and such, so
951    * be more conservative.
952    */
953   brw->max_gtt_map_object_size = gtt_size / 4;
954
955   if (brw->gen == 6)
956      brw->urb.gs_present = false;
957
958   brw->prim_restart.in_progress = false;
959   brw->prim_restart.enable_cut_index = false;
960   brw->gs.enabled = false;
961   brw->sf.viewport_transform_enable = true;
962
963   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
964
965   brw->use_resource_streamer = screen->has_resource_streamer &&
966      (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
967       env_var_as_boolean("INTEL_USE_GATHER", false));
968
969   ctx->VertexProgram._MaintainTnlProgram = true;
970   ctx->FragmentProgram._MaintainTexEnvProgram = true;
971
972   brw_draw_init( brw );
973
974   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
975      /* Turn on some extra GL_ARB_debug_output generation. */
976      brw->perf_debug = true;
977   }
978
979   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
980      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
981
982   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
983      brw_init_shader_time(brw);
984
985   _mesa_compute_version(ctx);
986
987   _mesa_initialize_dispatch_tables(ctx);
988   _mesa_initialize_vbo_vtxfmt(ctx);
989
990   if (ctx->Extensions.AMD_performance_monitor) {
991      brw_init_performance_monitors(brw);
992   }
993
994   vbo_use_buffer_objects(ctx);
995   vbo_always_unmap_buffers(ctx);
996
997   return true;
998}
999
1000void
1001intelDestroyContext(__DRIcontext * driContextPriv)
1002{
1003   struct brw_context *brw =
1004      (struct brw_context *) driContextPriv->driverPrivate;
1005   struct gl_context *ctx = &brw->ctx;
1006
1007   /* Dump a final BMP in case the application doesn't call SwapBuffers */
1008   if (INTEL_DEBUG & DEBUG_AUB) {
1009      intel_batchbuffer_flush(brw);
1010      aub_dump_bmp(&brw->ctx);
1011   }
1012
1013   _mesa_meta_free(&brw->ctx);
1014   brw_meta_fast_clear_free(brw);
1015
1016   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1017      /* Force a report. */
1018      brw->shader_time.report_time = 0;
1019
1020      brw_collect_and_report_shader_time(brw);
1021      brw_destroy_shader_time(brw);
1022   }
1023
1024   brw_destroy_state(brw);
1025   brw_draw_destroy(brw);
1026
1027   drm_intel_bo_unreference(brw->curbe.curbe_bo);
1028   if (brw->vs.base.scratch_bo)
1029      drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1030   if (brw->gs.base.scratch_bo)
1031      drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1032   if (brw->wm.base.scratch_bo)
1033      drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1034
1035   gen7_reset_hw_bt_pool_offsets(brw);
1036   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1037   brw->hw_bt_pool.bo = NULL;
1038
1039   drm_intel_gem_context_destroy(brw->hw_ctx);
1040
1041   if (ctx->swrast_context) {
1042      _swsetup_DestroyContext(&brw->ctx);
1043      _tnl_DestroyContext(&brw->ctx);
1044   }
1045   _vbo_DestroyContext(&brw->ctx);
1046
1047   if (ctx->swrast_context)
1048      _swrast_DestroyContext(&brw->ctx);
1049
1050   brw_fini_pipe_control(brw);
1051   intel_batchbuffer_free(brw);
1052
1053   drm_intel_bo_unreference(brw->throttle_batch[1]);
1054   drm_intel_bo_unreference(brw->throttle_batch[0]);
1055   brw->throttle_batch[1] = NULL;
1056   brw->throttle_batch[0] = NULL;
1057
1058   driDestroyOptionCache(&brw->optionCache);
1059
1060   /* free the Mesa context */
1061   _mesa_free_context_data(&brw->ctx);
1062
1063   ralloc_free(brw);
1064   driContextPriv->driverPrivate = NULL;
1065}
1066
1067GLboolean
1068intelUnbindContext(__DRIcontext * driContextPriv)
1069{
1070   /* Unset current context and dispath table */
1071   _mesa_make_current(NULL, NULL, NULL);
1072
1073   return true;
1074}
1075
1076/**
1077 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1078 * on window system framebuffers.
1079 *
1080 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1081 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1082 * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1083 * for a visual where you're guaranteed to be capable, but it turns out that
1084 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1085 * incapable ones, because there's no difference between the two in resources
1086 * used.  Applications thus get built that accidentally rely on the default
1087 * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1088 * great...
1089 *
1090 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1091 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1092 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1093 * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1094 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1095 * and get no sRGB encode (assuming that both kinds of visual are available).
1096 * Thus our choice to support sRGB by default on our visuals for desktop would
1097 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1098 *
1099 * Unfortunately, renderbuffer setup happens before a context is created.  So
1100 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1101 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1102 * yet), we go turn that back off before anyone finds out.
1103 */
1104static void
1105intel_gles3_srgb_workaround(struct brw_context *brw,
1106                            struct gl_framebuffer *fb)
1107{
1108   struct gl_context *ctx = &brw->ctx;
1109
1110   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1111      return;
1112
1113   /* Some day when we support the sRGB capable bit on visuals available for
1114    * GLES, we'll need to respect that and not disable things here.
1115    */
1116   fb->Visual.sRGBCapable = false;
1117   for (int i = 0; i < BUFFER_COUNT; i++) {
1118      if (fb->Attachment[i].Renderbuffer &&
1119          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1120         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1121      }
1122   }
1123}
1124
1125GLboolean
1126intelMakeCurrent(__DRIcontext * driContextPriv,
1127                 __DRIdrawable * driDrawPriv,
1128                 __DRIdrawable * driReadPriv)
1129{
1130   struct brw_context *brw;
1131   GET_CURRENT_CONTEXT(curCtx);
1132
1133   if (driContextPriv)
1134      brw = (struct brw_context *) driContextPriv->driverPrivate;
1135   else
1136      brw = NULL;
1137
1138   /* According to the glXMakeCurrent() man page: "Pending commands to
1139    * the previous context, if any, are flushed before it is released."
1140    * But only flush if we're actually changing contexts.
1141    */
1142   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1143      _mesa_flush(curCtx);
1144   }
1145
1146   if (driContextPriv) {
1147      struct gl_context *ctx = &brw->ctx;
1148      struct gl_framebuffer *fb, *readFb;
1149
1150      if (driDrawPriv == NULL) {
1151         fb = _mesa_get_incomplete_framebuffer();
1152      } else {
1153         fb = driDrawPriv->driverPrivate;
1154         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1155      }
1156
1157      if (driReadPriv == NULL) {
1158         readFb = _mesa_get_incomplete_framebuffer();
1159      } else {
1160         readFb = driReadPriv->driverPrivate;
1161         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1162      }
1163
1164      /* The sRGB workaround changes the renderbuffer's format. We must change
1165       * the format before the renderbuffer's miptree get's allocated, otherwise
1166       * the formats of the renderbuffer and its miptree will differ.
1167       */
1168      intel_gles3_srgb_workaround(brw, fb);
1169      intel_gles3_srgb_workaround(brw, readFb);
1170
1171      /* If the context viewport hasn't been initialized, force a call out to
1172       * the loader to get buffers so we have a drawable size for the initial
1173       * viewport. */
1174      if (!brw->ctx.ViewportInitialized)
1175         intel_prepare_render(brw);
1176
1177      _mesa_make_current(ctx, fb, readFb);
1178   } else {
1179      _mesa_make_current(NULL, NULL, NULL);
1180   }
1181
1182   return true;
1183}
1184
1185void
1186intel_resolve_for_dri2_flush(struct brw_context *brw,
1187                             __DRIdrawable *drawable)
1188{
1189   if (brw->gen < 6) {
1190      /* MSAA and fast color clear are not supported, so don't waste time
1191       * checking whether a resolve is needed.
1192       */
1193      return;
1194   }
1195
1196   struct gl_framebuffer *fb = drawable->driverPrivate;
1197   struct intel_renderbuffer *rb;
1198
1199   /* Usually, only the back buffer will need to be downsampled. However,
1200    * the front buffer will also need it if the user has rendered into it.
1201    */
1202   static const gl_buffer_index buffers[2] = {
1203         BUFFER_BACK_LEFT,
1204         BUFFER_FRONT_LEFT,
1205   };
1206
1207   for (int i = 0; i < 2; ++i) {
1208      rb = intel_get_renderbuffer(fb, buffers[i]);
1209      if (rb == NULL || rb->mt == NULL)
1210         continue;
1211      if (rb->mt->num_samples <= 1)
1212         intel_miptree_resolve_color(brw, rb->mt, 0);
1213      else
1214         intel_renderbuffer_downsample(brw, rb);
1215   }
1216}
1217
1218static unsigned
1219intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1220{
1221   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1222}
1223
1224static void
1225intel_query_dri2_buffers(struct brw_context *brw,
1226                         __DRIdrawable *drawable,
1227                         __DRIbuffer **buffers,
1228                         int *count);
1229
1230static void
1231intel_process_dri2_buffer(struct brw_context *brw,
1232                          __DRIdrawable *drawable,
1233                          __DRIbuffer *buffer,
1234                          struct intel_renderbuffer *rb,
1235                          const char *buffer_name);
1236
1237static void
1238intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1239
1240static void
1241intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1242{
1243   struct gl_framebuffer *fb = drawable->driverPrivate;
1244   struct intel_renderbuffer *rb;
1245   __DRIbuffer *buffers = NULL;
1246   int i, count;
1247   const char *region_name;
1248
1249   /* Set this up front, so that in case our buffers get invalidated
1250    * while we're getting new buffers, we don't clobber the stamp and
1251    * thus ignore the invalidate. */
1252   drawable->lastStamp = drawable->dri2.stamp;
1253
1254   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1255      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1256
1257   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1258
1259   if (buffers == NULL)
1260      return;
1261
1262   for (i = 0; i < count; i++) {
1263       switch (buffers[i].attachment) {
1264       case __DRI_BUFFER_FRONT_LEFT:
1265           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1266           region_name = "dri2 front buffer";
1267           break;
1268
1269       case __DRI_BUFFER_FAKE_FRONT_LEFT:
1270           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1271           region_name = "dri2 fake front buffer";
1272           break;
1273
1274       case __DRI_BUFFER_BACK_LEFT:
1275           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1276           region_name = "dri2 back buffer";
1277           break;
1278
1279       case __DRI_BUFFER_DEPTH:
1280       case __DRI_BUFFER_HIZ:
1281       case __DRI_BUFFER_DEPTH_STENCIL:
1282       case __DRI_BUFFER_STENCIL:
1283       case __DRI_BUFFER_ACCUM:
1284       default:
1285           fprintf(stderr,
1286                   "unhandled buffer attach event, attachment type %d\n",
1287                   buffers[i].attachment);
1288           return;
1289       }
1290
1291       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1292   }
1293
1294}
1295
1296void
1297intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1298{
1299   struct brw_context *brw = context->driverPrivate;
1300   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1301
1302   /* Set this up front, so that in case our buffers get invalidated
1303    * while we're getting new buffers, we don't clobber the stamp and
1304    * thus ignore the invalidate. */
1305   drawable->lastStamp = drawable->dri2.stamp;
1306
1307   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1308      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1309
1310   if (screen->image.loader)
1311      intel_update_image_buffers(brw, drawable);
1312   else
1313      intel_update_dri2_buffers(brw, drawable);
1314
1315   driUpdateFramebufferSize(&brw->ctx, drawable);
1316}
1317
1318/**
1319 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1320 * state is required.
1321 */
1322void
1323intel_prepare_render(struct brw_context *brw)
1324{
1325   struct gl_context *ctx = &brw->ctx;
1326   __DRIcontext *driContext = brw->driContext;
1327   __DRIdrawable *drawable;
1328
1329   drawable = driContext->driDrawablePriv;
1330   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1331      if (drawable->lastStamp != drawable->dri2.stamp)
1332         intel_update_renderbuffers(driContext, drawable);
1333      driContext->dri2.draw_stamp = drawable->dri2.stamp;
1334   }
1335
1336   drawable = driContext->driReadablePriv;
1337   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1338      if (drawable->lastStamp != drawable->dri2.stamp)
1339         intel_update_renderbuffers(driContext, drawable);
1340      driContext->dri2.read_stamp = drawable->dri2.stamp;
1341   }
1342
1343   /* If we're currently rendering to the front buffer, the rendering
1344    * that will happen next will probably dirty the front buffer.  So
1345    * mark it as dirty here.
1346    */
1347   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1348      brw->front_buffer_dirty = true;
1349}
1350
1351/**
1352 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1353 *
1354 * To determine which DRI buffers to request, examine the renderbuffers
1355 * attached to the drawable's framebuffer. Then request the buffers with
1356 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1357 *
1358 * This is called from intel_update_renderbuffers().
1359 *
1360 * \param drawable      Drawable whose buffers are queried.
1361 * \param buffers       [out] List of buffers returned by DRI2 query.
1362 * \param buffer_count  [out] Number of buffers returned.
1363 *
1364 * \see intel_update_renderbuffers()
1365 * \see DRI2GetBuffers()
1366 * \see DRI2GetBuffersWithFormat()
1367 */
1368static void
1369intel_query_dri2_buffers(struct brw_context *brw,
1370                         __DRIdrawable *drawable,
1371                         __DRIbuffer **buffers,
1372                         int *buffer_count)
1373{
1374   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1375   struct gl_framebuffer *fb = drawable->driverPrivate;
1376   int i = 0;
1377   unsigned attachments[8];
1378
1379   struct intel_renderbuffer *front_rb;
1380   struct intel_renderbuffer *back_rb;
1381
1382   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1383   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1384
1385   memset(attachments, 0, sizeof(attachments));
1386   if ((_mesa_is_front_buffer_drawing(fb) ||
1387        _mesa_is_front_buffer_reading(fb) ||
1388        !back_rb) && front_rb) {
1389      /* If a fake front buffer is in use, then querying for
1390       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1391       * the real front buffer to the fake front buffer.  So before doing the
1392       * query, we need to make sure all the pending drawing has landed in the
1393       * real front buffer.
1394       */
1395      intel_batchbuffer_flush(brw);
1396      intel_flush_front(&brw->ctx);
1397
1398      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1399      attachments[i++] = intel_bits_per_pixel(front_rb);
1400   } else if (front_rb && brw->front_buffer_dirty) {
1401      /* We have pending front buffer rendering, but we aren't querying for a
1402       * front buffer.  If the front buffer we have is a fake front buffer,
1403       * the X server is going to throw it away when it processes the query.
1404       * So before doing the query, make sure all the pending drawing has
1405       * landed in the real front buffer.
1406       */
1407      intel_batchbuffer_flush(brw);
1408      intel_flush_front(&brw->ctx);
1409   }
1410
1411   if (back_rb) {
1412      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1413      attachments[i++] = intel_bits_per_pixel(back_rb);
1414   }
1415
1416   assert(i <= ARRAY_SIZE(attachments));
1417
1418   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1419                                                        &drawable->w,
1420                                                        &drawable->h,
1421                                                        attachments, i / 2,
1422                                                        buffer_count,
1423                                                        drawable->loaderPrivate);
1424}
1425
1426/**
1427 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1428 *
1429 * This is called from intel_update_renderbuffers().
1430 *
1431 * \par Note:
1432 *    DRI buffers whose attachment point is DRI2BufferStencil or
1433 *    DRI2BufferDepthStencil are handled as special cases.
1434 *
1435 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1436 *        that is passed to drm_intel_bo_gem_create_from_name().
1437 *
1438 * \see intel_update_renderbuffers()
1439 */
1440static void
1441intel_process_dri2_buffer(struct brw_context *brw,
1442                          __DRIdrawable *drawable,
1443                          __DRIbuffer *buffer,
1444                          struct intel_renderbuffer *rb,
1445                          const char *buffer_name)
1446{
1447   struct gl_framebuffer *fb = drawable->driverPrivate;
1448   drm_intel_bo *bo;
1449
1450   if (!rb)
1451      return;
1452
1453   unsigned num_samples = rb->Base.Base.NumSamples;
1454
1455   /* We try to avoid closing and reopening the same BO name, because the first
1456    * use of a mapping of the buffer involves a bunch of page faulting which is
1457    * moderately expensive.
1458    */
1459   struct intel_mipmap_tree *last_mt;
1460   if (num_samples == 0)
1461      last_mt = rb->mt;
1462   else
1463      last_mt = rb->singlesample_mt;
1464
1465   uint32_t old_name = 0;
1466   if (last_mt) {
1467       /* The bo already has a name because the miptree was created by a
1468	* previous call to intel_process_dri2_buffer(). If a bo already has a
1469	* name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1470	* create a new name.
1471	*/
1472      drm_intel_bo_flink(last_mt->bo, &old_name);
1473   }
1474
1475   if (old_name == buffer->name)
1476      return;
1477
1478   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1479      fprintf(stderr,
1480              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1481              buffer->name, buffer->attachment,
1482              buffer->cpp, buffer->pitch);
1483   }
1484
1485   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1486                                          buffer->name);
1487   if (!bo) {
1488      fprintf(stderr,
1489              "Failed to open BO for returned DRI2 buffer "
1490              "(%dx%d, %s, named %d).\n"
1491              "This is likely a bug in the X Server that will lead to a "
1492              "crash soon.\n",
1493              drawable->w, drawable->h, buffer_name, buffer->name);
1494      return;
1495   }
1496
1497   intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1498                                            drawable->w, drawable->h,
1499                                            buffer->pitch);
1500
1501   if (_mesa_is_front_buffer_drawing(fb) &&
1502       (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1503        buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1504       rb->Base.Base.NumSamples > 1) {
1505      intel_renderbuffer_upsample(brw, rb);
1506   }
1507
1508   assert(rb->mt);
1509
1510   drm_intel_bo_unreference(bo);
1511}
1512
1513/**
1514 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1515 *
1516 * To determine which DRI buffers to request, examine the renderbuffers
1517 * attached to the drawable's framebuffer. Then request the buffers from
1518 * the image loader
1519 *
1520 * This is called from intel_update_renderbuffers().
1521 *
1522 * \param drawable      Drawable whose buffers are queried.
1523 * \param buffers       [out] List of buffers returned by DRI2 query.
1524 * \param buffer_count  [out] Number of buffers returned.
1525 *
1526 * \see intel_update_renderbuffers()
1527 */
1528
1529static void
1530intel_update_image_buffer(struct brw_context *intel,
1531                          __DRIdrawable *drawable,
1532                          struct intel_renderbuffer *rb,
1533                          __DRIimage *buffer,
1534                          enum __DRIimageBufferMask buffer_type)
1535{
1536   struct gl_framebuffer *fb = drawable->driverPrivate;
1537
1538   if (!rb || !buffer->bo)
1539      return;
1540
1541   unsigned num_samples = rb->Base.Base.NumSamples;
1542
1543   /* Check and see if we're already bound to the right
1544    * buffer object
1545    */
1546   struct intel_mipmap_tree *last_mt;
1547   if (num_samples == 0)
1548      last_mt = rb->mt;
1549   else
1550      last_mt = rb->singlesample_mt;
1551
1552   if (last_mt && last_mt->bo == buffer->bo)
1553      return;
1554
1555   intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1556                                            buffer->width, buffer->height,
1557                                            buffer->pitch);
1558
1559   if (_mesa_is_front_buffer_drawing(fb) &&
1560       buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1561       rb->Base.Base.NumSamples > 1) {
1562      intel_renderbuffer_upsample(intel, rb);
1563   }
1564}
1565
1566static void
1567intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1568{
1569   struct gl_framebuffer *fb = drawable->driverPrivate;
1570   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1571   struct intel_renderbuffer *front_rb;
1572   struct intel_renderbuffer *back_rb;
1573   struct __DRIimageList images;
1574   unsigned int format;
1575   uint32_t buffer_mask = 0;
1576
1577   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1578   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1579
1580   if (back_rb)
1581      format = intel_rb_format(back_rb);
1582   else if (front_rb)
1583      format = intel_rb_format(front_rb);
1584   else
1585      return;
1586
1587   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1588                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1589      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1590   }
1591
1592   if (back_rb)
1593      buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1594
1595   (*screen->image.loader->getBuffers) (drawable,
1596                                        driGLFormatToImageFormat(format),
1597                                        &drawable->dri2.stamp,
1598                                        drawable->loaderPrivate,
1599                                        buffer_mask,
1600                                        &images);
1601
1602   if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1603      drawable->w = images.front->width;
1604      drawable->h = images.front->height;
1605      intel_update_image_buffer(brw,
1606                                drawable,
1607                                front_rb,
1608                                images.front,
1609                                __DRI_IMAGE_BUFFER_FRONT);
1610   }
1611   if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1612      drawable->w = images.back->width;
1613      drawable->h = images.back->height;
1614      intel_update_image_buffer(brw,
1615                                drawable,
1616                                back_rb,
1617                                images.back,
1618                                __DRI_IMAGE_BUFFER_BACK);
1619   }
1620}
1621