brw_context.c revision 7b9def35835232a10010f256b9c108219f97f752
1/*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29  * Authors:
30  *   Keith Whitwell <keithw@vmware.com>
31  */
32
33
34#include "main/api_exec.h"
35#include "main/context.h"
36#include "main/fbobject.h"
37#include "main/extensions.h"
38#include "main/imports.h"
39#include "main/macros.h"
40#include "main/points.h"
41#include "main/version.h"
42#include "main/vtxfmt.h"
43#include "main/texobj.h"
44#include "main/framebuffer.h"
45
46#include "vbo/vbo_context.h"
47
48#include "drivers/common/driverfuncs.h"
49#include "drivers/common/meta.h"
50#include "utils.h"
51
52#include "brw_context.h"
53#include "brw_defines.h"
54#include "brw_compiler.h"
55#include "brw_draw.h"
56#include "brw_state.h"
57
58#include "intel_batchbuffer.h"
59#include "intel_buffer_objects.h"
60#include "intel_buffers.h"
61#include "intel_fbo.h"
62#include "intel_mipmap_tree.h"
63#include "intel_pixel.h"
64#include "intel_image.h"
65#include "intel_tex.h"
66#include "intel_tex_obj.h"
67
68#include "swrast_setup/swrast_setup.h"
69#include "tnl/tnl.h"
70#include "tnl/t_pipeline.h"
71#include "util/ralloc.h"
72#include "util/debug.h"
73#include "isl/isl.h"
74
75/***************************************
76 * Mesa's Driver Functions
77 ***************************************/
78
79const char *const brw_vendor_string = "Intel Open Source Technology Center";
80
81static const char *
82get_bsw_model(const struct intel_screen *intelScreen)
83{
84   switch (intelScreen->eu_total) {
85   case 16:
86      return "405";
87   case 12:
88      return "400";
89   default:
90      return "   ";
91   }
92}
93
94const char *
95brw_get_renderer_string(const struct intel_screen *intelScreen)
96{
97   const char *chipset;
98   static char buffer[128];
99   char *bsw = NULL;
100
101   switch (intelScreen->deviceID) {
102#undef CHIPSET
103#define CHIPSET(id, symbol, str) case id: chipset = str; break;
104#include "pci_ids/i965_pci_ids.h"
105   default:
106      chipset = "Unknown Intel Chipset";
107      break;
108   }
109
110   /* Braswell branding is funny, so we have to fix it up here */
111   if (intelScreen->deviceID == 0x22B1) {
112      bsw = strdup(chipset);
113      char *needle = strstr(bsw, "XXX");
114      if (needle) {
115         memcpy(needle, get_bsw_model(intelScreen), 3);
116         chipset = bsw;
117      }
118   }
119
120   (void) driGetRendererString(buffer, chipset, 0);
121   free(bsw);
122   return buffer;
123}
124
125static const GLubyte *
126intel_get_string(struct gl_context * ctx, GLenum name)
127{
128   const struct brw_context *const brw = brw_context(ctx);
129
130   switch (name) {
131   case GL_VENDOR:
132      return (GLubyte *) brw_vendor_string;
133
134   case GL_RENDERER:
135      return
136         (GLubyte *) brw_get_renderer_string(brw->intelScreen);
137
138   default:
139      return NULL;
140   }
141}
142
143static void
144intel_viewport(struct gl_context *ctx)
145{
146   struct brw_context *brw = brw_context(ctx);
147   __DRIcontext *driContext = brw->driContext;
148
149   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
150      if (driContext->driDrawablePriv)
151         dri2InvalidateDrawable(driContext->driDrawablePriv);
152      if (driContext->driReadablePriv)
153         dri2InvalidateDrawable(driContext->driReadablePriv);
154   }
155}
156
157static void
158intel_update_framebuffer(struct gl_context *ctx,
159                         struct gl_framebuffer *fb)
160{
161   struct brw_context *brw = brw_context(ctx);
162
163   /* Quantize the derived default number of samples
164    */
165   fb->DefaultGeometry._NumSamples =
166      intel_quantize_num_samples(brw->intelScreen,
167                                 fb->DefaultGeometry.NumSamples);
168}
169
170/* On Gen9 color buffers may be compressed by the hardware (lossless
171 * compression). There are, however, format restrictions and care needs to be
172 * taken that the sampler engine is capable for re-interpreting a buffer with
173 * format different the buffer was originally written with.
174 *
175 * For example, SRGB formats are not compressible and the sampler engine isn't
176 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
177 * color buffer needs to be resolved so that the sampling surface can be
178 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
179 * set).
180 */
181static bool
182intel_texture_view_requires_resolve(struct brw_context *brw,
183                                    struct intel_texture_object *intel_tex)
184{
185   if (brw->gen < 9 ||
186       !intel_miptree_is_lossless_compressed(brw, intel_tex->mt))
187     return false;
188
189   const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format);
190
191   if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo,
192                                                brw_format))
193      return false;
194
195   perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
196              _mesa_get_format_name(intel_tex->_Format),
197              _mesa_get_format_name(intel_tex->mt->format));
198
199   return true;
200}
201
202static void
203intel_update_state(struct gl_context * ctx, GLuint new_state)
204{
205   struct brw_context *brw = brw_context(ctx);
206   struct intel_texture_object *tex_obj;
207   struct intel_renderbuffer *depth_irb;
208
209   if (ctx->swrast_context)
210      _swrast_InvalidateState(ctx, new_state);
211   _vbo_InvalidateState(ctx, new_state);
212
213   brw->NewGLState |= new_state;
214
215   _mesa_unlock_context_textures(ctx);
216
217   /* Resolve the depth buffer's HiZ buffer. */
218   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
219   if (depth_irb)
220      intel_renderbuffer_resolve_hiz(brw, depth_irb);
221
222   /* Resolve depth buffer and render cache of each enabled texture. */
223   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
224   for (int i = 0; i <= maxEnabledUnit; i++) {
225      if (!ctx->Texture.Unit[i]._Current)
226	 continue;
227      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
228      if (!tex_obj || !tex_obj->mt)
229	 continue;
230      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
231      /* Sampling engine understands lossless compression and resolving
232       * those surfaces should be skipped for performance reasons.
233       */
234      const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
235                           0 : INTEL_MIPTREE_IGNORE_CCS_E;
236      intel_miptree_resolve_color(brw, tex_obj->mt, flags);
237      brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
238   }
239
240   /* Resolve color for each active shader image. */
241   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
242      const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
243         ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
244
245      if (unlikely(shader && shader->NumImages)) {
246         for (unsigned j = 0; j < shader->NumImages; j++) {
247            struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
248            tex_obj = intel_texture_object(u->TexObj);
249
250            if (tex_obj && tex_obj->mt) {
251               /* Access to images is implemented using indirect messages
252                * against data port. Normal render target write understands
253                * lossless compression but unfortunately the typed/untyped
254                * read/write interface doesn't. Therefore the compressed
255                * surfaces need to be resolved prior to accessing them.
256                */
257               intel_miptree_resolve_color(brw, tex_obj->mt, 0);
258               brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
259            }
260         }
261      }
262   }
263
264   /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
265    * single-sampled color renderbuffers because the CCS buffer isn't
266    * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
267    * enabled because otherwise the surface state will be programmed with the
268    * linear equivalent format anyway.
269    */
270   if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
271      struct gl_framebuffer *fb = ctx->DrawBuffer;
272      for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
273         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
274
275         if (rb == NULL)
276            continue;
277
278         struct intel_renderbuffer *irb = intel_renderbuffer(rb);
279         struct intel_mipmap_tree *mt = irb->mt;
280
281         if (mt == NULL ||
282             mt->num_samples > 1 ||
283             _mesa_get_srgb_format_linear(mt->format) == mt->format)
284               continue;
285
286         /* Lossless compression is not supported for SRGB formats, it
287          * should be impossible to get here with such surfaces.
288          */
289         assert(!intel_miptree_is_lossless_compressed(brw, mt));
290         intel_miptree_resolve_color(brw, mt, 0);
291         brw_render_cache_set_check_flush(brw, mt->bo);
292      }
293   }
294
295   _mesa_lock_context_textures(ctx);
296
297   if (new_state & _NEW_BUFFERS) {
298      intel_update_framebuffer(ctx, ctx->DrawBuffer);
299      if (ctx->DrawBuffer != ctx->ReadBuffer)
300         intel_update_framebuffer(ctx, ctx->ReadBuffer);
301   }
302}
303
304#define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
305
306static void
307intel_flush_front(struct gl_context *ctx)
308{
309   struct brw_context *brw = brw_context(ctx);
310   __DRIcontext *driContext = brw->driContext;
311   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
312   __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
313
314   if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
315      if (flushFront(screen) && driDrawable &&
316          driDrawable->loaderPrivate) {
317
318         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
319          *
320          * This potentially resolves both front and back buffer. It
321          * is unnecessary to resolve the back, but harms nothing except
322          * performance. And no one cares about front-buffer render
323          * performance.
324          */
325         intel_resolve_for_dri2_flush(brw, driDrawable);
326         intel_batchbuffer_flush(brw);
327
328         flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
329
330         /* We set the dirty bit in intel_prepare_render() if we're
331          * front buffer rendering once we get there.
332          */
333         brw->front_buffer_dirty = false;
334      }
335   }
336}
337
338static void
339intel_glFlush(struct gl_context *ctx)
340{
341   struct brw_context *brw = brw_context(ctx);
342
343   intel_batchbuffer_flush(brw);
344   intel_flush_front(ctx);
345
346   brw->need_flush_throttle = true;
347}
348
349static void
350intel_finish(struct gl_context * ctx)
351{
352   struct brw_context *brw = brw_context(ctx);
353
354   intel_glFlush(ctx);
355
356   if (brw->batch.last_bo)
357      drm_intel_bo_wait_rendering(brw->batch.last_bo);
358}
359
360static void
361brw_init_driver_functions(struct brw_context *brw,
362                          struct dd_function_table *functions)
363{
364   _mesa_init_driver_functions(functions);
365
366   /* GLX uses DRI2 invalidate events to handle window resizing.
367    * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
368    * which doesn't provide a mechanism for snooping the event queues.
369    *
370    * So EGL still relies on viewport hacks to handle window resizing.
371    * This should go away with DRI3000.
372    */
373   if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
374      functions->Viewport = intel_viewport;
375
376   functions->Flush = intel_glFlush;
377   functions->Finish = intel_finish;
378   functions->GetString = intel_get_string;
379   functions->UpdateState = intel_update_state;
380
381   intelInitTextureFuncs(functions);
382   intelInitTextureImageFuncs(functions);
383   intelInitTextureSubImageFuncs(functions);
384   intelInitTextureCopyImageFuncs(functions);
385   intelInitCopyImageFuncs(functions);
386   intelInitClearFuncs(functions);
387   intelInitBufferFuncs(functions);
388   intelInitPixelFuncs(functions);
389   intelInitBufferObjectFuncs(functions);
390   intel_init_syncobj_functions(functions);
391   brw_init_object_purgeable_functions(functions);
392
393   brwInitFragProgFuncs( functions );
394   brw_init_common_queryobj_functions(functions);
395   if (brw->gen >= 8 || brw->is_haswell)
396      hsw_init_queryobj_functions(functions);
397   else if (brw->gen >= 6)
398      gen6_init_queryobj_functions(functions);
399   else
400      gen4_init_queryobj_functions(functions);
401   brw_init_compute_functions(functions);
402   if (brw->gen >= 7)
403      brw_init_conditional_render_functions(functions);
404
405   functions->QueryInternalFormat = brw_query_internal_format;
406
407   functions->NewTransformFeedback = brw_new_transform_feedback;
408   functions->DeleteTransformFeedback = brw_delete_transform_feedback;
409   if (brw->intelScreen->has_mi_math_and_lrr) {
410      functions->BeginTransformFeedback = hsw_begin_transform_feedback;
411      functions->EndTransformFeedback = hsw_end_transform_feedback;
412      functions->PauseTransformFeedback = hsw_pause_transform_feedback;
413      functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
414   } else if (brw->gen >= 7) {
415      functions->BeginTransformFeedback = gen7_begin_transform_feedback;
416      functions->EndTransformFeedback = gen7_end_transform_feedback;
417      functions->PauseTransformFeedback = gen7_pause_transform_feedback;
418      functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
419      functions->GetTransformFeedbackVertexCount =
420         brw_get_transform_feedback_vertex_count;
421   } else {
422      functions->BeginTransformFeedback = brw_begin_transform_feedback;
423      functions->EndTransformFeedback = brw_end_transform_feedback;
424   }
425
426   if (brw->gen >= 6)
427      functions->GetSamplePosition = gen6_get_sample_position;
428}
429
430static void
431brw_initialize_context_constants(struct brw_context *brw)
432{
433   struct gl_context *ctx = &brw->ctx;
434   const struct brw_compiler *compiler = brw->intelScreen->compiler;
435
436   const bool stage_exists[MESA_SHADER_STAGES] = {
437      [MESA_SHADER_VERTEX] = true,
438      [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
439      [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
440      [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
441      [MESA_SHADER_FRAGMENT] = true,
442      [MESA_SHADER_COMPUTE] =
443         (ctx->API == API_OPENGL_CORE &&
444          ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
445         (ctx->API == API_OPENGLES2 &&
446          ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
447         _mesa_extension_override_enables.ARB_compute_shader,
448   };
449
450   unsigned num_stages = 0;
451   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
452      if (stage_exists[i])
453         num_stages++;
454   }
455
456   unsigned max_samplers =
457      brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
458
459   ctx->Const.MaxDualSourceDrawBuffers = 1;
460   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
461   ctx->Const.MaxCombinedShaderOutputResources =
462      MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
463
464   ctx->Const.QueryCounterBits.Timestamp = 36;
465
466   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
467   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
468   ctx->Const.MaxRenderbufferSize = 8192;
469   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
470   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
471   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
472   ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
473   ctx->Const.MaxTextureMbytes = 1536;
474   ctx->Const.MaxTextureRectSize = 1 << 12;
475   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
476   ctx->Const.StripTextureBorder = true;
477   if (brw->gen >= 7)
478      ctx->Const.MaxProgramTextureGatherComponents = 4;
479   else if (brw->gen == 6)
480      ctx->Const.MaxProgramTextureGatherComponents = 1;
481
482   ctx->Const.MaxUniformBlockSize = 65536;
483
484   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
485      struct gl_program_constants *prog = &ctx->Const.Program[i];
486
487      if (!stage_exists[i])
488         continue;
489
490      prog->MaxTextureImageUnits = max_samplers;
491
492      prog->MaxUniformBlocks = BRW_MAX_UBO;
493      prog->MaxCombinedUniformComponents =
494         prog->MaxUniformComponents +
495         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
496
497      prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
498      prog->MaxAtomicBuffers = BRW_MAX_ABO;
499      prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
500      prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
501   }
502
503   ctx->Const.MaxTextureUnits =
504      MIN2(ctx->Const.MaxTextureCoordUnits,
505           ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
506
507   ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
508   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
509   ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
510   ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
511   ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
512   ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
513   ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
514
515
516   /* Hardware only supports a limited number of transform feedback buffers.
517    * So we need to override the Mesa default (which is based only on software
518    * limits).
519    */
520   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
521
522   /* On Gen6, in the worst case, we use up one binding table entry per
523    * transform feedback component (see comments above the definition of
524    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
525    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
526    * BRW_MAX_SOL_BINDINGS.
527    *
528    * In "separate components" mode, we need to divide this value by
529    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
530    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
531    */
532   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
533   ctx->Const.MaxTransformFeedbackSeparateComponents =
534      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
535
536   ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
537      !brw->intelScreen->has_mi_math_and_lrr;
538
539   int max_samples;
540   const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
541   const int clamp_max_samples =
542      driQueryOptioni(&brw->optionCache, "clamp_max_samples");
543
544   if (clamp_max_samples < 0) {
545      max_samples = msaa_modes[0];
546   } else {
547      /* Select the largest supported MSAA mode that does not exceed
548       * clamp_max_samples.
549       */
550      max_samples = 0;
551      for (int i = 0; msaa_modes[i] != 0; ++i) {
552         if (msaa_modes[i] <= clamp_max_samples) {
553            max_samples = msaa_modes[i];
554            break;
555         }
556      }
557   }
558
559   ctx->Const.MaxSamples = max_samples;
560   ctx->Const.MaxColorTextureSamples = max_samples;
561   ctx->Const.MaxDepthTextureSamples = max_samples;
562   ctx->Const.MaxIntegerSamples = max_samples;
563   ctx->Const.MaxImageSamples = 0;
564
565   /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
566    * to map indices of rectangular grid to sample numbers within a pixel.
567    * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
568    * extension implementation. For more details see the comment above
569    * gen6_set_sample_maps() definition.
570    */
571   gen6_set_sample_maps(ctx);
572
573   ctx->Const.MinLineWidth = 1.0;
574   ctx->Const.MinLineWidthAA = 1.0;
575   if (brw->gen >= 6) {
576      ctx->Const.MaxLineWidth = 7.375;
577      ctx->Const.MaxLineWidthAA = 7.375;
578      ctx->Const.LineWidthGranularity = 0.125;
579   } else {
580      ctx->Const.MaxLineWidth = 7.0;
581      ctx->Const.MaxLineWidthAA = 7.0;
582      ctx->Const.LineWidthGranularity = 0.5;
583   }
584
585   /* For non-antialiased lines, we have to round the line width to the
586    * nearest whole number. Make sure that we don't advertise a line
587    * width that, when rounded, will be beyond the actual hardware
588    * maximum.
589    */
590   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
591
592   ctx->Const.MinPointSize = 1.0;
593   ctx->Const.MinPointSizeAA = 1.0;
594   ctx->Const.MaxPointSize = 255.0;
595   ctx->Const.MaxPointSizeAA = 255.0;
596   ctx->Const.PointSizeGranularity = 1.0;
597
598   if (brw->gen >= 5 || brw->is_g4x)
599      ctx->Const.MaxClipPlanes = 8;
600
601   ctx->Const.LowerTessLevel = true;
602   ctx->Const.LowerCsDerivedVariables = true;
603   ctx->Const.PrimitiveRestartForPatches = true;
604
605   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
606   ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
607   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
608   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
609   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
610   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
611   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
612   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
613   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
614   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
615   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
616   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
617      MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
618	   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
619
620   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
621   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
622   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
623   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
624   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
625   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
626   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
627   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
628   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
629      MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
630	   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
631
632   /* Fragment shaders use real, 32-bit twos-complement integers for all
633    * integer types.
634    */
635   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
636   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
637   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
638   ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
639   ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
640
641   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
642   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
643   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
644   ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
645   ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
646
647   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
648    * but we're not sure how it's actually done for vertex order,
649    * that affect provoking vertex decision. Always use last vertex
650    * convention for quad primitive which works as expected for now.
651    */
652   if (brw->gen >= 6)
653      ctx->Const.QuadsFollowProvokingVertexConvention = false;
654
655   ctx->Const.NativeIntegers = true;
656   ctx->Const.VertexID_is_zero_based = true;
657
658   /* Regarding the CMP instruction, the Ivybridge PRM says:
659    *
660    *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
661    *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
662    *    0xFFFFFFFF) is assigned to dst."
663    *
664    * but PRMs for earlier generations say
665    *
666    *   "In dword format, one GRF may store up to 8 results. When the register
667    *    is used later as a vector of Booleans, as only LSB at each channel
668    *    contains meaning [sic] data, software should make sure all higher bits
669    *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
670    *
671    * We select the representation of a true boolean uniform to be ~0, and fix
672    * the results of Gen <= 5 CMP instruction's with -(result & 1).
673    */
674   ctx->Const.UniformBooleanTrue = ~0;
675
676   /* From the gen4 PRM, volume 4 page 127:
677    *
678    *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
679    *      the base address of the first element of the surface, computed in
680    *      software by adding the surface base address to the byte offset of
681    *      the element in the buffer."
682    *
683    * However, unaligned accesses are slower, so enforce buffer alignment.
684    */
685   ctx->Const.UniformBufferOffsetAlignment = 16;
686
687   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
688    * that we can safely have the CPU and GPU writing the same SSBO on
689    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
690    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
691    * be updating disjoint regions of the buffer simultaneously and that will
692    * break if the regions overlap the same cacheline.
693    */
694   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
695   ctx->Const.TextureBufferOffsetAlignment = 16;
696   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
697
698   if (brw->gen >= 6) {
699      ctx->Const.MaxVarying = 32;
700      ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
701      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
702      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
703      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
704      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
705      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
706      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
707      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
708   }
709
710   /* We want the GLSL compiler to emit code that uses condition codes */
711   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
712      ctx->Const.ShaderCompilerOptions[i] =
713         brw->intelScreen->compiler->glsl_compiler_options[i];
714   }
715
716   if (brw->gen >= 7) {
717      ctx->Const.MaxViewportWidth = 32768;
718      ctx->Const.MaxViewportHeight = 32768;
719   }
720
721   /* ARB_viewport_array */
722   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
723      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
724      ctx->Const.ViewportSubpixelBits = 0;
725
726      /* Cast to float before negating because MaxViewportWidth is unsigned.
727       */
728      ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
729      ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
730   }
731
732   /* ARB_gpu_shader5 */
733   if (brw->gen >= 7)
734      ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
735
736   /* ARB_framebuffer_no_attachments */
737   ctx->Const.MaxFramebufferWidth = 16384;
738   ctx->Const.MaxFramebufferHeight = 16384;
739   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
740   ctx->Const.MaxFramebufferSamples = max_samples;
741}
742
743static void
744brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
745{
746   struct gl_context *ctx = &brw->ctx;
747   /* Maximum number of scalar compute shader invocations that can be run in
748    * parallel in the same subslice assuming SIMD32 dispatch.
749    */
750   const uint32_t max_invocations = 32 * max_threads;
751   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
752   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
753   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
754   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
755   ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
756}
757
758/**
759 * Process driconf (drirc) options, setting appropriate context flags.
760 *
761 * intelInitExtensions still pokes at optionCache directly, in order to
762 * avoid advertising various extensions.  No flags are set, so it makes
763 * sense to continue doing that there.
764 */
765static void
766brw_process_driconf_options(struct brw_context *brw)
767{
768   struct gl_context *ctx = &brw->ctx;
769
770   driOptionCache *options = &brw->optionCache;
771   driParseConfigFiles(options, &brw->intelScreen->optionCache,
772                       brw->driContext->driScreenPriv->myNum, "i965");
773
774   int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
775   switch (bo_reuse_mode) {
776   case DRI_CONF_BO_REUSE_DISABLED:
777      break;
778   case DRI_CONF_BO_REUSE_ALL:
779      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
780      break;
781   }
782
783   if (!driQueryOptionb(options, "hiz")) {
784       brw->has_hiz = false;
785       /* On gen6, you can only do separate stencil with HIZ. */
786       if (brw->gen == 6)
787          brw->has_separate_stencil = false;
788   }
789
790   if (driQueryOptionb(options, "always_flush_batch")) {
791      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
792      brw->always_flush_batch = true;
793   }
794
795   if (driQueryOptionb(options, "always_flush_cache")) {
796      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
797      brw->always_flush_cache = true;
798   }
799
800   if (driQueryOptionb(options, "disable_throttling")) {
801      fprintf(stderr, "disabling flush throttling\n");
802      brw->disable_throttling = true;
803   }
804
805   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
806
807   ctx->Const.ForceGLSLExtensionsWarn =
808      driQueryOptionb(options, "force_glsl_extensions_warn");
809
810   ctx->Const.DisableGLSLLineContinuations =
811      driQueryOptionb(options, "disable_glsl_line_continuations");
812
813   ctx->Const.AllowGLSLExtensionDirectiveMidShader =
814      driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
815
816   brw->dual_color_blend_by_location =
817      driQueryOptionb(options, "dual_color_blend_by_location");
818}
819
820GLboolean
821brwCreateContext(gl_api api,
822	         const struct gl_config *mesaVis,
823		 __DRIcontext *driContextPriv,
824                 unsigned major_version,
825                 unsigned minor_version,
826                 uint32_t flags,
827                 bool notify_reset,
828                 unsigned *dri_ctx_error,
829	         void *sharedContextPrivate)
830{
831   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
832   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
833   struct intel_screen *screen = sPriv->driverPrivate;
834   const struct brw_device_info *devinfo = screen->devinfo;
835   struct dd_function_table functions;
836
837   /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
838    * provides us with context reset notifications.
839    */
840   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
841      | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
842
843   if (screen->has_context_reset_notification)
844      allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
845
846   if (flags & ~allowed_flags) {
847      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
848      return false;
849   }
850
851   struct brw_context *brw = rzalloc(NULL, struct brw_context);
852   if (!brw) {
853      fprintf(stderr, "%s: failed to alloc context\n", __func__);
854      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
855      return false;
856   }
857
858   driContextPriv->driverPrivate = brw;
859   brw->driContext = driContextPriv;
860   brw->intelScreen = screen;
861   brw->bufmgr = screen->bufmgr;
862
863   brw->gen = devinfo->gen;
864   brw->gt = devinfo->gt;
865   brw->is_g4x = devinfo->is_g4x;
866   brw->is_baytrail = devinfo->is_baytrail;
867   brw->is_haswell = devinfo->is_haswell;
868   brw->is_cherryview = devinfo->is_cherryview;
869   brw->is_broxton = devinfo->is_broxton;
870   brw->has_llc = devinfo->has_llc;
871   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
872   brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
873   brw->has_pln = devinfo->has_pln;
874   brw->has_compr4 = devinfo->has_compr4;
875   brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
876   brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
877   brw->needs_unlit_centroid_workaround =
878      devinfo->needs_unlit_centroid_workaround;
879
880   brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
881   brw->has_swizzling = screen->hw_has_swizzling;
882
883   brw->vs.base.stage = MESA_SHADER_VERTEX;
884   brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
885   brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
886   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
887   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
888   if (brw->gen >= 8) {
889      gen8_init_vtable_surface_functions(brw);
890      brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
891   } else if (brw->gen >= 7) {
892      gen7_init_vtable_surface_functions(brw);
893      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
894   } else if (brw->gen >= 6) {
895      gen6_init_vtable_surface_functions(brw);
896      brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
897   } else {
898      gen4_init_vtable_surface_functions(brw);
899      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
900   }
901
902   brw_init_driver_functions(brw, &functions);
903
904   if (notify_reset)
905      functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
906
907   struct gl_context *ctx = &brw->ctx;
908
909   if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
910      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
911      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
912      intelDestroyContext(driContextPriv);
913      return false;
914   }
915
916   driContextSetFlags(ctx, flags);
917
918   /* Initialize the software rasterizer and helper modules.
919    *
920    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
921    * software fallbacks (which we have to support on legacy GL to do weird
922    * glDrawPixels(), glBitmap(), and other functions).
923    */
924   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
925      _swrast_CreateContext(ctx);
926   }
927
928   _vbo_CreateContext(ctx);
929   if (ctx->swrast_context) {
930      _tnl_CreateContext(ctx);
931      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
932      _swsetup_CreateContext(ctx);
933
934      /* Configure swrast to match hardware characteristics: */
935      _swrast_allow_pixel_fog(ctx, false);
936      _swrast_allow_vertex_fog(ctx, true);
937   }
938
939   _mesa_meta_init(ctx);
940
941   brw_process_driconf_options(brw);
942
943   if (INTEL_DEBUG & DEBUG_PERF)
944      brw->perf_debug = true;
945
946   brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
947   brw_initialize_context_constants(brw);
948
949   ctx->Const.ResetStrategy = notify_reset
950      ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
951
952   /* Reinitialize the context point state.  It depends on ctx->Const values. */
953   _mesa_init_point(ctx);
954
955   intel_fbo_init(brw);
956
957   intel_batchbuffer_init(brw);
958
959   if (brw->gen >= 6) {
960      /* Create a new hardware context.  Using a hardware context means that
961       * our GPU state will be saved/restored on context switch, allowing us
962       * to assume that the GPU is in the same state we left it in.
963       *
964       * This is required for transform feedback buffer offsets, query objects,
965       * and also allows us to reduce how much state we have to emit.
966       */
967      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
968
969      if (!brw->hw_ctx) {
970         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
971         intelDestroyContext(driContextPriv);
972         return false;
973      }
974   }
975
976   if (brw_init_pipe_control(brw, devinfo)) {
977      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
978      intelDestroyContext(driContextPriv);
979      return false;
980   }
981
982   brw_init_state(brw);
983
984   intelInitExtensions(ctx);
985
986   brw_init_surface_formats(brw);
987
988   brw->max_vs_threads = devinfo->max_vs_threads;
989   brw->max_hs_threads = devinfo->max_hs_threads;
990   brw->max_ds_threads = devinfo->max_ds_threads;
991   brw->max_gs_threads = devinfo->max_gs_threads;
992   brw->max_wm_threads = devinfo->max_wm_threads;
993   /* FINISHME: Do this for all platforms that the kernel supports */
994   if (brw->is_cherryview &&
995       screen->subslice_total > 0 && screen->eu_total > 0) {
996      /* Logical CS threads = EUs per subslice * 7 threads per EU */
997      brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
998
999      /* Fuse configurations may give more threads than expected, never less. */
1000      if (brw->max_cs_threads < devinfo->max_cs_threads)
1001         brw->max_cs_threads = devinfo->max_cs_threads;
1002   } else {
1003      brw->max_cs_threads = devinfo->max_cs_threads;
1004   }
1005   brw->urb.size = devinfo->urb.size;
1006   brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
1007   brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
1008   brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
1009   brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
1010   brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
1011
1012   /* Estimate the size of the mappable aperture into the GTT.  There's an
1013    * ioctl to get the whole GTT size, but not one to get the mappable subset.
1014    * It turns out it's basically always 256MB, though some ancient hardware
1015    * was smaller.
1016    */
1017   uint32_t gtt_size = 256 * 1024 * 1024;
1018
1019   /* We don't want to map two objects such that a memcpy between them would
1020    * just fault one mapping in and then the other over and over forever.  So
1021    * we would need to divide the GTT size by 2.  Additionally, some GTT is
1022    * taken up by things like the framebuffer and the ringbuffer and such, so
1023    * be more conservative.
1024    */
1025   brw->max_gtt_map_object_size = gtt_size / 4;
1026
1027   if (brw->gen == 6)
1028      brw->urb.gs_present = false;
1029
1030   brw->prim_restart.in_progress = false;
1031   brw->prim_restart.enable_cut_index = false;
1032   brw->gs.enabled = false;
1033   brw->sf.viewport_transform_enable = true;
1034
1035   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1036
1037   brw->use_resource_streamer = screen->has_resource_streamer &&
1038      (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
1039       env_var_as_boolean("INTEL_USE_GATHER", false));
1040
1041   ctx->VertexProgram._MaintainTnlProgram = true;
1042   ctx->FragmentProgram._MaintainTexEnvProgram = true;
1043
1044   brw_draw_init( brw );
1045
1046   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1047      /* Turn on some extra GL_ARB_debug_output generation. */
1048      brw->perf_debug = true;
1049   }
1050
1051   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
1052      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1053
1054   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1055      brw_init_shader_time(brw);
1056
1057   _mesa_compute_version(ctx);
1058
1059   _mesa_initialize_dispatch_tables(ctx);
1060   _mesa_initialize_vbo_vtxfmt(ctx);
1061
1062   if (ctx->Extensions.AMD_performance_monitor) {
1063      brw_init_performance_monitors(brw);
1064   }
1065
1066   vbo_use_buffer_objects(ctx);
1067   vbo_always_unmap_buffers(ctx);
1068
1069   return true;
1070}
1071
1072void
1073intelDestroyContext(__DRIcontext * driContextPriv)
1074{
1075   struct brw_context *brw =
1076      (struct brw_context *) driContextPriv->driverPrivate;
1077   struct gl_context *ctx = &brw->ctx;
1078
1079   /* Dump a final BMP in case the application doesn't call SwapBuffers */
1080   if (INTEL_DEBUG & DEBUG_AUB) {
1081      intel_batchbuffer_flush(brw);
1082      aub_dump_bmp(&brw->ctx);
1083   }
1084
1085   _mesa_meta_free(&brw->ctx);
1086
1087   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1088      /* Force a report. */
1089      brw->shader_time.report_time = 0;
1090
1091      brw_collect_and_report_shader_time(brw);
1092      brw_destroy_shader_time(brw);
1093   }
1094
1095   brw_destroy_state(brw);
1096   brw_draw_destroy(brw);
1097
1098   drm_intel_bo_unreference(brw->curbe.curbe_bo);
1099   if (brw->vs.base.scratch_bo)
1100      drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1101   if (brw->gs.base.scratch_bo)
1102      drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1103   if (brw->wm.base.scratch_bo)
1104      drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1105
1106   gen7_reset_hw_bt_pool_offsets(brw);
1107   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1108   brw->hw_bt_pool.bo = NULL;
1109
1110   drm_intel_gem_context_destroy(brw->hw_ctx);
1111
1112   if (ctx->swrast_context) {
1113      _swsetup_DestroyContext(&brw->ctx);
1114      _tnl_DestroyContext(&brw->ctx);
1115   }
1116   _vbo_DestroyContext(&brw->ctx);
1117
1118   if (ctx->swrast_context)
1119      _swrast_DestroyContext(&brw->ctx);
1120
1121   brw_fini_pipe_control(brw);
1122   intel_batchbuffer_free(brw);
1123
1124   drm_intel_bo_unreference(brw->throttle_batch[1]);
1125   drm_intel_bo_unreference(brw->throttle_batch[0]);
1126   brw->throttle_batch[1] = NULL;
1127   brw->throttle_batch[0] = NULL;
1128
1129   driDestroyOptionCache(&brw->optionCache);
1130
1131   /* free the Mesa context */
1132   _mesa_free_context_data(&brw->ctx);
1133
1134   ralloc_free(brw);
1135   driContextPriv->driverPrivate = NULL;
1136}
1137
1138GLboolean
1139intelUnbindContext(__DRIcontext * driContextPriv)
1140{
1141   /* Unset current context and dispath table */
1142   _mesa_make_current(NULL, NULL, NULL);
1143
1144   return true;
1145}
1146
1147/**
1148 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1149 * on window system framebuffers.
1150 *
1151 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1152 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1153 * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1154 * for a visual where you're guaranteed to be capable, but it turns out that
1155 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1156 * incapable ones, because there's no difference between the two in resources
1157 * used.  Applications thus get built that accidentally rely on the default
1158 * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1159 * great...
1160 *
1161 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1162 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1163 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1164 * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1165 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1166 * and get no sRGB encode (assuming that both kinds of visual are available).
1167 * Thus our choice to support sRGB by default on our visuals for desktop would
1168 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1169 *
1170 * Unfortunately, renderbuffer setup happens before a context is created.  So
1171 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1172 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1173 * yet), we go turn that back off before anyone finds out.
1174 */
1175static void
1176intel_gles3_srgb_workaround(struct brw_context *brw,
1177                            struct gl_framebuffer *fb)
1178{
1179   struct gl_context *ctx = &brw->ctx;
1180
1181   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1182      return;
1183
1184   /* Some day when we support the sRGB capable bit on visuals available for
1185    * GLES, we'll need to respect that and not disable things here.
1186    */
1187   fb->Visual.sRGBCapable = false;
1188   for (int i = 0; i < BUFFER_COUNT; i++) {
1189      struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1190      if (rb)
1191         rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1192   }
1193}
1194
1195GLboolean
1196intelMakeCurrent(__DRIcontext * driContextPriv,
1197                 __DRIdrawable * driDrawPriv,
1198                 __DRIdrawable * driReadPriv)
1199{
1200   struct brw_context *brw;
1201   GET_CURRENT_CONTEXT(curCtx);
1202
1203   if (driContextPriv)
1204      brw = (struct brw_context *) driContextPriv->driverPrivate;
1205   else
1206      brw = NULL;
1207
1208   /* According to the glXMakeCurrent() man page: "Pending commands to
1209    * the previous context, if any, are flushed before it is released."
1210    * But only flush if we're actually changing contexts.
1211    */
1212   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1213      _mesa_flush(curCtx);
1214   }
1215
1216   if (driContextPriv) {
1217      struct gl_context *ctx = &brw->ctx;
1218      struct gl_framebuffer *fb, *readFb;
1219
1220      if (driDrawPriv == NULL) {
1221         fb = _mesa_get_incomplete_framebuffer();
1222      } else {
1223         fb = driDrawPriv->driverPrivate;
1224         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1225      }
1226
1227      if (driReadPriv == NULL) {
1228         readFb = _mesa_get_incomplete_framebuffer();
1229      } else {
1230         readFb = driReadPriv->driverPrivate;
1231         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1232      }
1233
1234      /* The sRGB workaround changes the renderbuffer's format. We must change
1235       * the format before the renderbuffer's miptree get's allocated, otherwise
1236       * the formats of the renderbuffer and its miptree will differ.
1237       */
1238      intel_gles3_srgb_workaround(brw, fb);
1239      intel_gles3_srgb_workaround(brw, readFb);
1240
1241      /* If the context viewport hasn't been initialized, force a call out to
1242       * the loader to get buffers so we have a drawable size for the initial
1243       * viewport. */
1244      if (!brw->ctx.ViewportInitialized)
1245         intel_prepare_render(brw);
1246
1247      _mesa_make_current(ctx, fb, readFb);
1248   } else {
1249      _mesa_make_current(NULL, NULL, NULL);
1250   }
1251
1252   return true;
1253}
1254
1255void
1256intel_resolve_for_dri2_flush(struct brw_context *brw,
1257                             __DRIdrawable *drawable)
1258{
1259   if (brw->gen < 6) {
1260      /* MSAA and fast color clear are not supported, so don't waste time
1261       * checking whether a resolve is needed.
1262       */
1263      return;
1264   }
1265
1266   struct gl_framebuffer *fb = drawable->driverPrivate;
1267   struct intel_renderbuffer *rb;
1268
1269   /* Usually, only the back buffer will need to be downsampled. However,
1270    * the front buffer will also need it if the user has rendered into it.
1271    */
1272   static const gl_buffer_index buffers[2] = {
1273         BUFFER_BACK_LEFT,
1274         BUFFER_FRONT_LEFT,
1275   };
1276
1277   for (int i = 0; i < 2; ++i) {
1278      rb = intel_get_renderbuffer(fb, buffers[i]);
1279      if (rb == NULL || rb->mt == NULL)
1280         continue;
1281      if (rb->mt->num_samples <= 1)
1282         intel_miptree_resolve_color(brw, rb->mt, 0);
1283      else
1284         intel_renderbuffer_downsample(brw, rb);
1285   }
1286}
1287
1288static unsigned
1289intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1290{
1291   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1292}
1293
1294static void
1295intel_query_dri2_buffers(struct brw_context *brw,
1296                         __DRIdrawable *drawable,
1297                         __DRIbuffer **buffers,
1298                         int *count);
1299
1300static void
1301intel_process_dri2_buffer(struct brw_context *brw,
1302                          __DRIdrawable *drawable,
1303                          __DRIbuffer *buffer,
1304                          struct intel_renderbuffer *rb,
1305                          const char *buffer_name);
1306
1307static void
1308intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1309
1310static void
1311intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1312{
1313   struct gl_framebuffer *fb = drawable->driverPrivate;
1314   struct intel_renderbuffer *rb;
1315   __DRIbuffer *buffers = NULL;
1316   int i, count;
1317   const char *region_name;
1318
1319   /* Set this up front, so that in case our buffers get invalidated
1320    * while we're getting new buffers, we don't clobber the stamp and
1321    * thus ignore the invalidate. */
1322   drawable->lastStamp = drawable->dri2.stamp;
1323
1324   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1325      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1326
1327   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1328
1329   if (buffers == NULL)
1330      return;
1331
1332   for (i = 0; i < count; i++) {
1333       switch (buffers[i].attachment) {
1334       case __DRI_BUFFER_FRONT_LEFT:
1335           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1336           region_name = "dri2 front buffer";
1337           break;
1338
1339       case __DRI_BUFFER_FAKE_FRONT_LEFT:
1340           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1341           region_name = "dri2 fake front buffer";
1342           break;
1343
1344       case __DRI_BUFFER_BACK_LEFT:
1345           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1346           region_name = "dri2 back buffer";
1347           break;
1348
1349       case __DRI_BUFFER_DEPTH:
1350       case __DRI_BUFFER_HIZ:
1351       case __DRI_BUFFER_DEPTH_STENCIL:
1352       case __DRI_BUFFER_STENCIL:
1353       case __DRI_BUFFER_ACCUM:
1354       default:
1355           fprintf(stderr,
1356                   "unhandled buffer attach event, attachment type %d\n",
1357                   buffers[i].attachment);
1358           return;
1359       }
1360
1361       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1362   }
1363
1364}
1365
1366void
1367intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1368{
1369   struct brw_context *brw = context->driverPrivate;
1370   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1371
1372   /* Set this up front, so that in case our buffers get invalidated
1373    * while we're getting new buffers, we don't clobber the stamp and
1374    * thus ignore the invalidate. */
1375   drawable->lastStamp = drawable->dri2.stamp;
1376
1377   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1378      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1379
1380   if (screen->image.loader)
1381      intel_update_image_buffers(brw, drawable);
1382   else
1383      intel_update_dri2_buffers(brw, drawable);
1384
1385   driUpdateFramebufferSize(&brw->ctx, drawable);
1386}
1387
1388/**
1389 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1390 * state is required.
1391 */
1392void
1393intel_prepare_render(struct brw_context *brw)
1394{
1395   struct gl_context *ctx = &brw->ctx;
1396   __DRIcontext *driContext = brw->driContext;
1397   __DRIdrawable *drawable;
1398
1399   drawable = driContext->driDrawablePriv;
1400   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1401      if (drawable->lastStamp != drawable->dri2.stamp)
1402         intel_update_renderbuffers(driContext, drawable);
1403      driContext->dri2.draw_stamp = drawable->dri2.stamp;
1404   }
1405
1406   drawable = driContext->driReadablePriv;
1407   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1408      if (drawable->lastStamp != drawable->dri2.stamp)
1409         intel_update_renderbuffers(driContext, drawable);
1410      driContext->dri2.read_stamp = drawable->dri2.stamp;
1411   }
1412
1413   /* If we're currently rendering to the front buffer, the rendering
1414    * that will happen next will probably dirty the front buffer.  So
1415    * mark it as dirty here.
1416    */
1417   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1418      brw->front_buffer_dirty = true;
1419}
1420
1421/**
1422 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1423 *
1424 * To determine which DRI buffers to request, examine the renderbuffers
1425 * attached to the drawable's framebuffer. Then request the buffers with
1426 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1427 *
1428 * This is called from intel_update_renderbuffers().
1429 *
1430 * \param drawable      Drawable whose buffers are queried.
1431 * \param buffers       [out] List of buffers returned by DRI2 query.
1432 * \param buffer_count  [out] Number of buffers returned.
1433 *
1434 * \see intel_update_renderbuffers()
1435 * \see DRI2GetBuffers()
1436 * \see DRI2GetBuffersWithFormat()
1437 */
1438static void
1439intel_query_dri2_buffers(struct brw_context *brw,
1440                         __DRIdrawable *drawable,
1441                         __DRIbuffer **buffers,
1442                         int *buffer_count)
1443{
1444   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1445   struct gl_framebuffer *fb = drawable->driverPrivate;
1446   int i = 0;
1447   unsigned attachments[8];
1448
1449   struct intel_renderbuffer *front_rb;
1450   struct intel_renderbuffer *back_rb;
1451
1452   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1453   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1454
1455   memset(attachments, 0, sizeof(attachments));
1456   if ((_mesa_is_front_buffer_drawing(fb) ||
1457        _mesa_is_front_buffer_reading(fb) ||
1458        !back_rb) && front_rb) {
1459      /* If a fake front buffer is in use, then querying for
1460       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1461       * the real front buffer to the fake front buffer.  So before doing the
1462       * query, we need to make sure all the pending drawing has landed in the
1463       * real front buffer.
1464       */
1465      intel_batchbuffer_flush(brw);
1466      intel_flush_front(&brw->ctx);
1467
1468      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1469      attachments[i++] = intel_bits_per_pixel(front_rb);
1470   } else if (front_rb && brw->front_buffer_dirty) {
1471      /* We have pending front buffer rendering, but we aren't querying for a
1472       * front buffer.  If the front buffer we have is a fake front buffer,
1473       * the X server is going to throw it away when it processes the query.
1474       * So before doing the query, make sure all the pending drawing has
1475       * landed in the real front buffer.
1476       */
1477      intel_batchbuffer_flush(brw);
1478      intel_flush_front(&brw->ctx);
1479   }
1480
1481   if (back_rb) {
1482      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1483      attachments[i++] = intel_bits_per_pixel(back_rb);
1484   }
1485
1486   assert(i <= ARRAY_SIZE(attachments));
1487
1488   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1489                                                        &drawable->w,
1490                                                        &drawable->h,
1491                                                        attachments, i / 2,
1492                                                        buffer_count,
1493                                                        drawable->loaderPrivate);
1494}
1495
1496/**
1497 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1498 *
1499 * This is called from intel_update_renderbuffers().
1500 *
1501 * \par Note:
1502 *    DRI buffers whose attachment point is DRI2BufferStencil or
1503 *    DRI2BufferDepthStencil are handled as special cases.
1504 *
1505 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1506 *        that is passed to drm_intel_bo_gem_create_from_name().
1507 *
1508 * \see intel_update_renderbuffers()
1509 */
1510static void
1511intel_process_dri2_buffer(struct brw_context *brw,
1512                          __DRIdrawable *drawable,
1513                          __DRIbuffer *buffer,
1514                          struct intel_renderbuffer *rb,
1515                          const char *buffer_name)
1516{
1517   struct gl_framebuffer *fb = drawable->driverPrivate;
1518   drm_intel_bo *bo;
1519
1520   if (!rb)
1521      return;
1522
1523   unsigned num_samples = rb->Base.Base.NumSamples;
1524
1525   /* We try to avoid closing and reopening the same BO name, because the first
1526    * use of a mapping of the buffer involves a bunch of page faulting which is
1527    * moderately expensive.
1528    */
1529   struct intel_mipmap_tree *last_mt;
1530   if (num_samples == 0)
1531      last_mt = rb->mt;
1532   else
1533      last_mt = rb->singlesample_mt;
1534
1535   uint32_t old_name = 0;
1536   if (last_mt) {
1537       /* The bo already has a name because the miptree was created by a
1538	* previous call to intel_process_dri2_buffer(). If a bo already has a
1539	* name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1540	* create a new name.
1541	*/
1542      drm_intel_bo_flink(last_mt->bo, &old_name);
1543   }
1544
1545   if (old_name == buffer->name)
1546      return;
1547
1548   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1549      fprintf(stderr,
1550              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1551              buffer->name, buffer->attachment,
1552              buffer->cpp, buffer->pitch);
1553   }
1554
1555   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1556                                          buffer->name);
1557   if (!bo) {
1558      fprintf(stderr,
1559              "Failed to open BO for returned DRI2 buffer "
1560              "(%dx%d, %s, named %d).\n"
1561              "This is likely a bug in the X Server that will lead to a "
1562              "crash soon.\n",
1563              drawable->w, drawable->h, buffer_name, buffer->name);
1564      return;
1565   }
1566
1567   intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1568                                            drawable->w, drawable->h,
1569                                            buffer->pitch);
1570
1571   if (_mesa_is_front_buffer_drawing(fb) &&
1572       (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1573        buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1574       rb->Base.Base.NumSamples > 1) {
1575      intel_renderbuffer_upsample(brw, rb);
1576   }
1577
1578   assert(rb->mt);
1579
1580   drm_intel_bo_unreference(bo);
1581}
1582
1583/**
1584 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1585 *
1586 * To determine which DRI buffers to request, examine the renderbuffers
1587 * attached to the drawable's framebuffer. Then request the buffers from
1588 * the image loader
1589 *
1590 * This is called from intel_update_renderbuffers().
1591 *
1592 * \param drawable      Drawable whose buffers are queried.
1593 * \param buffers       [out] List of buffers returned by DRI2 query.
1594 * \param buffer_count  [out] Number of buffers returned.
1595 *
1596 * \see intel_update_renderbuffers()
1597 */
1598
1599static void
1600intel_update_image_buffer(struct brw_context *intel,
1601                          __DRIdrawable *drawable,
1602                          struct intel_renderbuffer *rb,
1603                          __DRIimage *buffer,
1604                          enum __DRIimageBufferMask buffer_type)
1605{
1606   struct gl_framebuffer *fb = drawable->driverPrivate;
1607
1608   if (!rb || !buffer->bo)
1609      return;
1610
1611   unsigned num_samples = rb->Base.Base.NumSamples;
1612
1613   /* Check and see if we're already bound to the right
1614    * buffer object
1615    */
1616   struct intel_mipmap_tree *last_mt;
1617   if (num_samples == 0)
1618      last_mt = rb->mt;
1619   else
1620      last_mt = rb->singlesample_mt;
1621
1622   if (last_mt && last_mt->bo == buffer->bo)
1623      return;
1624
1625   intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1626                                            buffer->width, buffer->height,
1627                                            buffer->pitch);
1628
1629   if (_mesa_is_front_buffer_drawing(fb) &&
1630       buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1631       rb->Base.Base.NumSamples > 1) {
1632      intel_renderbuffer_upsample(intel, rb);
1633   }
1634}
1635
1636static void
1637intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1638{
1639   struct gl_framebuffer *fb = drawable->driverPrivate;
1640   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1641   struct intel_renderbuffer *front_rb;
1642   struct intel_renderbuffer *back_rb;
1643   struct __DRIimageList images;
1644   unsigned int format;
1645   uint32_t buffer_mask = 0;
1646
1647   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1648   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1649
1650   if (back_rb)
1651      format = intel_rb_format(back_rb);
1652   else if (front_rb)
1653      format = intel_rb_format(front_rb);
1654   else
1655      return;
1656
1657   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1658                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1659      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1660   }
1661
1662   if (back_rb)
1663      buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1664
1665   (*screen->image.loader->getBuffers) (drawable,
1666                                        driGLFormatToImageFormat(format),
1667                                        &drawable->dri2.stamp,
1668                                        drawable->loaderPrivate,
1669                                        buffer_mask,
1670                                        &images);
1671
1672   if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1673      drawable->w = images.front->width;
1674      drawable->h = images.front->height;
1675      intel_update_image_buffer(brw,
1676                                drawable,
1677                                front_rb,
1678                                images.front,
1679                                __DRI_IMAGE_BUFFER_FRONT);
1680   }
1681   if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1682      drawable->w = images.back->width;
1683      drawable->h = images.back->height;
1684      intel_update_image_buffer(brw,
1685                                drawable,
1686                                back_rb,
1687                                images.back,
1688                                __DRI_IMAGE_BUFFER_BACK);
1689   }
1690}
1691