brw_context.c revision a9e6a56a02155f0da5e5bfa1a4d188f3d6195066
1/*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29  * Authors:
30  *   Keith Whitwell <keithw@vmware.com>
31  */
32
33
34#include "main/api_exec.h"
35#include "main/context.h"
36#include "main/fbobject.h"
37#include "main/extensions.h"
38#include "main/imports.h"
39#include "main/macros.h"
40#include "main/points.h"
41#include "main/version.h"
42#include "main/vtxfmt.h"
43#include "main/texobj.h"
44#include "main/framebuffer.h"
45
46#include "vbo/vbo_context.h"
47
48#include "drivers/common/driverfuncs.h"
49#include "drivers/common/meta.h"
50#include "utils.h"
51
52#include "brw_context.h"
53#include "brw_defines.h"
54#include "brw_compiler.h"
55#include "brw_draw.h"
56#include "brw_state.h"
57
58#include "intel_batchbuffer.h"
59#include "intel_buffer_objects.h"
60#include "intel_buffers.h"
61#include "intel_fbo.h"
62#include "intel_mipmap_tree.h"
63#include "intel_pixel.h"
64#include "intel_image.h"
65#include "intel_tex.h"
66#include "intel_tex_obj.h"
67
68#include "swrast_setup/swrast_setup.h"
69#include "tnl/tnl.h"
70#include "tnl/t_pipeline.h"
71#include "util/ralloc.h"
72#include "util/debug.h"
73
74/***************************************
75 * Mesa's Driver Functions
76 ***************************************/
77
78static size_t
79brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
80                             GLenum internalFormat, int samples[16])
81{
82   struct brw_context *brw = brw_context(ctx);
83
84   (void) target;
85
86   switch (brw->gen) {
87   case 9:
88      samples[0] = 16;
89      samples[1] = 8;
90      samples[2] = 4;
91      samples[3] = 2;
92      return 4;
93
94   case 8:
95      samples[0] = 8;
96      samples[1] = 4;
97      samples[2] = 2;
98      return 3;
99
100   case 7:
101      samples[0] = 8;
102      samples[1] = 4;
103      return 2;
104
105   case 6:
106      samples[0] = 4;
107      return 1;
108
109   default:
110      assert(brw->gen < 6);
111      samples[0] = 1;
112      return 1;
113   }
114}
115
116const char *const brw_vendor_string = "Intel Open Source Technology Center";
117
118const char *
119brw_get_renderer_string(unsigned deviceID)
120{
121   const char *chipset;
122   static char buffer[128];
123
124   switch (deviceID) {
125#undef CHIPSET
126#define CHIPSET(id, symbol, str) case id: chipset = str; break;
127#include "pci_ids/i965_pci_ids.h"
128   default:
129      chipset = "Unknown Intel Chipset";
130      break;
131   }
132
133   (void) driGetRendererString(buffer, chipset, 0);
134   return buffer;
135}
136
137static const GLubyte *
138intel_get_string(struct gl_context * ctx, GLenum name)
139{
140   const struct brw_context *const brw = brw_context(ctx);
141
142   switch (name) {
143   case GL_VENDOR:
144      return (GLubyte *) brw_vendor_string;
145
146   case GL_RENDERER:
147      return
148         (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
149
150   default:
151      return NULL;
152   }
153}
154
155static void
156intel_viewport(struct gl_context *ctx)
157{
158   struct brw_context *brw = brw_context(ctx);
159   __DRIcontext *driContext = brw->driContext;
160
161   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
162      dri2InvalidateDrawable(driContext->driDrawablePriv);
163      dri2InvalidateDrawable(driContext->driReadablePriv);
164   }
165}
166
167static void
168intel_update_state(struct gl_context * ctx, GLuint new_state)
169{
170   struct brw_context *brw = brw_context(ctx);
171   struct intel_texture_object *tex_obj;
172   struct intel_renderbuffer *depth_irb;
173
174   if (ctx->swrast_context)
175      _swrast_InvalidateState(ctx, new_state);
176   _vbo_InvalidateState(ctx, new_state);
177
178   brw->NewGLState |= new_state;
179
180   _mesa_unlock_context_textures(ctx);
181
182   /* Resolve the depth buffer's HiZ buffer. */
183   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
184   if (depth_irb)
185      intel_renderbuffer_resolve_hiz(brw, depth_irb);
186
187   /* Resolve depth buffer and render cache of each enabled texture. */
188   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
189   for (int i = 0; i <= maxEnabledUnit; i++) {
190      if (!ctx->Texture.Unit[i]._Current)
191	 continue;
192      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
193      if (!tex_obj || !tex_obj->mt)
194	 continue;
195      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
196      intel_miptree_resolve_color(brw, tex_obj->mt);
197      brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
198   }
199
200   _mesa_lock_context_textures(ctx);
201}
202
203#define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
204
205static void
206intel_flush_front(struct gl_context *ctx)
207{
208   struct brw_context *brw = brw_context(ctx);
209   __DRIcontext *driContext = brw->driContext;
210   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
211   __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
212
213   if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
214      if (flushFront(screen) && driDrawable &&
215          driDrawable->loaderPrivate) {
216
217         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
218          *
219          * This potentially resolves both front and back buffer. It
220          * is unnecessary to resolve the back, but harms nothing except
221          * performance. And no one cares about front-buffer render
222          * performance.
223          */
224         intel_resolve_for_dri2_flush(brw, driDrawable);
225         intel_batchbuffer_flush(brw);
226
227         flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
228
229         /* We set the dirty bit in intel_prepare_render() if we're
230          * front buffer rendering once we get there.
231          */
232         brw->front_buffer_dirty = false;
233      }
234   }
235}
236
237static void
238intel_glFlush(struct gl_context *ctx)
239{
240   struct brw_context *brw = brw_context(ctx);
241
242   intel_batchbuffer_flush(brw);
243   intel_flush_front(ctx);
244
245   brw->need_flush_throttle = true;
246}
247
248static void
249intel_finish(struct gl_context * ctx)
250{
251   struct brw_context *brw = brw_context(ctx);
252
253   intel_glFlush(ctx);
254
255   if (brw->batch.last_bo)
256      drm_intel_bo_wait_rendering(brw->batch.last_bo);
257}
258
259static void
260brw_init_driver_functions(struct brw_context *brw,
261                          struct dd_function_table *functions)
262{
263   _mesa_init_driver_functions(functions);
264
265   /* GLX uses DRI2 invalidate events to handle window resizing.
266    * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
267    * which doesn't provide a mechanism for snooping the event queues.
268    *
269    * So EGL still relies on viewport hacks to handle window resizing.
270    * This should go away with DRI3000.
271    */
272   if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
273      functions->Viewport = intel_viewport;
274
275   functions->Flush = intel_glFlush;
276   functions->Finish = intel_finish;
277   functions->GetString = intel_get_string;
278   functions->UpdateState = intel_update_state;
279
280   intelInitTextureFuncs(functions);
281   intelInitTextureImageFuncs(functions);
282   intelInitTextureSubImageFuncs(functions);
283   intelInitTextureCopyImageFuncs(functions);
284   intelInitCopyImageFuncs(functions);
285   intelInitClearFuncs(functions);
286   intelInitBufferFuncs(functions);
287   intelInitPixelFuncs(functions);
288   intelInitBufferObjectFuncs(functions);
289   intel_init_syncobj_functions(functions);
290   brw_init_object_purgeable_functions(functions);
291
292   brwInitFragProgFuncs( functions );
293   brw_init_common_queryobj_functions(functions);
294   if (brw->gen >= 6)
295      gen6_init_queryobj_functions(functions);
296   else
297      gen4_init_queryobj_functions(functions);
298   brw_init_compute_functions(functions);
299   if (brw->gen >= 7)
300      brw_init_conditional_render_functions(functions);
301
302   functions->QuerySamplesForFormat = brw_query_samples_for_format;
303
304   functions->NewTransformFeedback = brw_new_transform_feedback;
305   functions->DeleteTransformFeedback = brw_delete_transform_feedback;
306   functions->GetTransformFeedbackVertexCount =
307      brw_get_transform_feedback_vertex_count;
308   if (brw->gen >= 7) {
309      functions->BeginTransformFeedback = gen7_begin_transform_feedback;
310      functions->EndTransformFeedback = gen7_end_transform_feedback;
311      functions->PauseTransformFeedback = gen7_pause_transform_feedback;
312      functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
313   } else {
314      functions->BeginTransformFeedback = brw_begin_transform_feedback;
315      functions->EndTransformFeedback = brw_end_transform_feedback;
316   }
317
318   if (brw->gen >= 6)
319      functions->GetSamplePosition = gen6_get_sample_position;
320}
321
322static void
323brw_initialize_context_constants(struct brw_context *brw)
324{
325   struct gl_context *ctx = &brw->ctx;
326   const struct brw_compiler *compiler = brw->intelScreen->compiler;
327
328   const bool stage_exists[MESA_SHADER_STAGES] = {
329      [MESA_SHADER_VERTEX] = true,
330      [MESA_SHADER_TESS_CTRL] = false,
331      [MESA_SHADER_TESS_EVAL] = false,
332      [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
333      [MESA_SHADER_FRAGMENT] = true,
334      [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader,
335   };
336
337   unsigned num_stages = 0;
338   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
339      if (stage_exists[i])
340         num_stages++;
341   }
342
343   unsigned max_samplers =
344      brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
345
346   ctx->Const.MaxDualSourceDrawBuffers = 1;
347   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
348   ctx->Const.MaxCombinedShaderOutputResources =
349      MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
350
351   ctx->Const.QueryCounterBits.Timestamp = 36;
352
353   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
354   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
355   ctx->Const.MaxRenderbufferSize = 8192;
356   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
357   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
358   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
359   ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
360   ctx->Const.MaxTextureMbytes = 1536;
361   ctx->Const.MaxTextureRectSize = 1 << 12;
362   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
363   ctx->Const.StripTextureBorder = true;
364   if (brw->gen >= 7)
365      ctx->Const.MaxProgramTextureGatherComponents = 4;
366   else if (brw->gen == 6)
367      ctx->Const.MaxProgramTextureGatherComponents = 1;
368
369   ctx->Const.MaxUniformBlockSize = 65536;
370
371   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
372      struct gl_program_constants *prog = &ctx->Const.Program[i];
373
374      if (!stage_exists[i])
375         continue;
376
377      prog->MaxTextureImageUnits = max_samplers;
378
379      prog->MaxUniformBlocks = BRW_MAX_UBO;
380      prog->MaxCombinedUniformComponents =
381         prog->MaxUniformComponents +
382         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
383
384      prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
385      prog->MaxAtomicBuffers = BRW_MAX_ABO;
386      prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
387      prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
388   }
389
390   ctx->Const.MaxTextureUnits =
391      MIN2(ctx->Const.MaxTextureCoordUnits,
392           ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
393
394   ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
395   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
396   ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
397   ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
398   ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
399   ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
400   ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
401
402
403   /* Hardware only supports a limited number of transform feedback buffers.
404    * So we need to override the Mesa default (which is based only on software
405    * limits).
406    */
407   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
408
409   /* On Gen6, in the worst case, we use up one binding table entry per
410    * transform feedback component (see comments above the definition of
411    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
412    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
413    * BRW_MAX_SOL_BINDINGS.
414    *
415    * In "separate components" mode, we need to divide this value by
416    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
417    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
418    */
419   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
420   ctx->Const.MaxTransformFeedbackSeparateComponents =
421      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
422
423   ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
424
425   int max_samples;
426   const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
427   const int clamp_max_samples =
428      driQueryOptioni(&brw->optionCache, "clamp_max_samples");
429
430   if (clamp_max_samples < 0) {
431      max_samples = msaa_modes[0];
432   } else {
433      /* Select the largest supported MSAA mode that does not exceed
434       * clamp_max_samples.
435       */
436      max_samples = 0;
437      for (int i = 0; msaa_modes[i] != 0; ++i) {
438         if (msaa_modes[i] <= clamp_max_samples) {
439            max_samples = msaa_modes[i];
440            break;
441         }
442      }
443   }
444
445   ctx->Const.MaxSamples = max_samples;
446   ctx->Const.MaxColorTextureSamples = max_samples;
447   ctx->Const.MaxDepthTextureSamples = max_samples;
448   ctx->Const.MaxIntegerSamples = max_samples;
449   ctx->Const.MaxImageSamples = 0;
450
451   /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
452    * to map indices of rectangular grid to sample numbers within a pixel.
453    * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
454    * extension implementation. For more details see the comment above
455    * gen6_set_sample_maps() definition.
456    */
457   gen6_set_sample_maps(ctx);
458
459   ctx->Const.MinLineWidth = 1.0;
460   ctx->Const.MinLineWidthAA = 1.0;
461   if (brw->gen >= 6) {
462      ctx->Const.MaxLineWidth = 7.375;
463      ctx->Const.MaxLineWidthAA = 7.375;
464      ctx->Const.LineWidthGranularity = 0.125;
465   } else {
466      ctx->Const.MaxLineWidth = 7.0;
467      ctx->Const.MaxLineWidthAA = 7.0;
468      ctx->Const.LineWidthGranularity = 0.5;
469   }
470
471   /* For non-antialiased lines, we have to round the line width to the
472    * nearest whole number. Make sure that we don't advertise a line
473    * width that, when rounded, will be beyond the actual hardware
474    * maximum.
475    */
476   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
477
478   ctx->Const.MinPointSize = 1.0;
479   ctx->Const.MinPointSizeAA = 1.0;
480   ctx->Const.MaxPointSize = 255.0;
481   ctx->Const.MaxPointSizeAA = 255.0;
482   ctx->Const.PointSizeGranularity = 1.0;
483
484   if (brw->gen >= 5 || brw->is_g4x)
485      ctx->Const.MaxClipPlanes = 8;
486
487   ctx->Const.LowerTessLevel = true;
488
489   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
490   ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
491   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
492   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
493   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
494   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
495   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
496   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
497   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
498   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
499   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
500   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
501      MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
502	   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
503
504   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
505   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
506   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
507   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
508   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
509   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
510   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
511   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
512   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
513      MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
514	   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
515
516   /* Fragment shaders use real, 32-bit twos-complement integers for all
517    * integer types.
518    */
519   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
520   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
521   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
522   ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
523   ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
524
525   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
526   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
527   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
528   ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
529   ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
530
531   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
532    * but we're not sure how it's actually done for vertex order,
533    * that affect provoking vertex decision. Always use last vertex
534    * convention for quad primitive which works as expected for now.
535    */
536   if (brw->gen >= 6)
537      ctx->Const.QuadsFollowProvokingVertexConvention = false;
538
539   ctx->Const.NativeIntegers = true;
540   ctx->Const.VertexID_is_zero_based = true;
541
542   /* Regarding the CMP instruction, the Ivybridge PRM says:
543    *
544    *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
545    *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
546    *    0xFFFFFFFF) is assigned to dst."
547    *
548    * but PRMs for earlier generations say
549    *
550    *   "In dword format, one GRF may store up to 8 results. When the register
551    *    is used later as a vector of Booleans, as only LSB at each channel
552    *    contains meaning [sic] data, software should make sure all higher bits
553    *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
554    *
555    * We select the representation of a true boolean uniform to be ~0, and fix
556    * the results of Gen <= 5 CMP instruction's with -(result & 1).
557    */
558   ctx->Const.UniformBooleanTrue = ~0;
559
560   /* From the gen4 PRM, volume 4 page 127:
561    *
562    *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
563    *      the base address of the first element of the surface, computed in
564    *      software by adding the surface base address to the byte offset of
565    *      the element in the buffer."
566    *
567    * However, unaligned accesses are slower, so enforce buffer alignment.
568    */
569   ctx->Const.UniformBufferOffsetAlignment = 16;
570
571   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
572    * that we can safely have the CPU and GPU writing the same SSBO on
573    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
574    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
575    * be updating disjoint regions of the buffer simultaneously and that will
576    * break if the regions overlap the same cacheline.
577    */
578   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
579   ctx->Const.TextureBufferOffsetAlignment = 16;
580   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
581
582   if (brw->gen >= 6) {
583      ctx->Const.MaxVarying = 32;
584      ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
585      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
586      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
587      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
588   }
589
590   /* We want the GLSL compiler to emit code that uses condition codes */
591   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
592      ctx->Const.ShaderCompilerOptions[i] =
593         brw->intelScreen->compiler->glsl_compiler_options[i];
594   }
595
596   /* ARB_viewport_array */
597   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
598      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
599      ctx->Const.ViewportSubpixelBits = 0;
600
601      /* Cast to float before negating because MaxViewportWidth is unsigned.
602       */
603      ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
604      ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
605   }
606
607   /* ARB_gpu_shader5 */
608   if (brw->gen >= 7)
609      ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
610
611   /* ARB_framebuffer_no_attachments */
612   ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
613   ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
614   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
615   ctx->Const.MaxFramebufferSamples = max_samples;
616}
617
618static void
619brw_adjust_cs_context_constants(struct brw_context *brw)
620{
621   struct gl_context *ctx = &brw->ctx;
622
623   /* For ES, we set these constants based on SIMD8.
624    *
625    * TODO: Once we can always generate SIMD16, we should update this.
626    *
627    * For GL, we assume we can generate a SIMD16 program, but this currently
628    * is not always true. This allows us to run more test cases, and will be
629    * required based on desktop GL compute shader requirements.
630    */
631   const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
632
633   const uint32_t max_invocations = simd_size * brw->max_cs_threads;
634   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
635   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
636   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
637   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
638}
639
640/**
641 * Process driconf (drirc) options, setting appropriate context flags.
642 *
643 * intelInitExtensions still pokes at optionCache directly, in order to
644 * avoid advertising various extensions.  No flags are set, so it makes
645 * sense to continue doing that there.
646 */
647static void
648brw_process_driconf_options(struct brw_context *brw)
649{
650   struct gl_context *ctx = &brw->ctx;
651
652   driOptionCache *options = &brw->optionCache;
653   driParseConfigFiles(options, &brw->intelScreen->optionCache,
654                       brw->driContext->driScreenPriv->myNum, "i965");
655
656   int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
657   switch (bo_reuse_mode) {
658   case DRI_CONF_BO_REUSE_DISABLED:
659      break;
660   case DRI_CONF_BO_REUSE_ALL:
661      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
662      break;
663   }
664
665   if (!driQueryOptionb(options, "hiz")) {
666       brw->has_hiz = false;
667       /* On gen6, you can only do separate stencil with HIZ. */
668       if (brw->gen == 6)
669          brw->has_separate_stencil = false;
670   }
671
672   if (driQueryOptionb(options, "always_flush_batch")) {
673      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
674      brw->always_flush_batch = true;
675   }
676
677   if (driQueryOptionb(options, "always_flush_cache")) {
678      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
679      brw->always_flush_cache = true;
680   }
681
682   if (driQueryOptionb(options, "disable_throttling")) {
683      fprintf(stderr, "disabling flush throttling\n");
684      brw->disable_throttling = true;
685   }
686
687   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
688
689   ctx->Const.ForceGLSLExtensionsWarn =
690      driQueryOptionb(options, "force_glsl_extensions_warn");
691
692   ctx->Const.DisableGLSLLineContinuations =
693      driQueryOptionb(options, "disable_glsl_line_continuations");
694
695   ctx->Const.AllowGLSLExtensionDirectiveMidShader =
696      driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
697}
698
699GLboolean
700brwCreateContext(gl_api api,
701	         const struct gl_config *mesaVis,
702		 __DRIcontext *driContextPriv,
703                 unsigned major_version,
704                 unsigned minor_version,
705                 uint32_t flags,
706                 bool notify_reset,
707                 unsigned *dri_ctx_error,
708	         void *sharedContextPrivate)
709{
710   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
711   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
712   struct intel_screen *screen = sPriv->driverPrivate;
713   const struct brw_device_info *devinfo = screen->devinfo;
714   struct dd_function_table functions;
715
716   /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
717    * provides us with context reset notifications.
718    */
719   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
720      | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
721
722   if (screen->has_context_reset_notification)
723      allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
724
725   if (flags & ~allowed_flags) {
726      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
727      return false;
728   }
729
730   struct brw_context *brw = rzalloc(NULL, struct brw_context);
731   if (!brw) {
732      fprintf(stderr, "%s: failed to alloc context\n", __func__);
733      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
734      return false;
735   }
736
737   driContextPriv->driverPrivate = brw;
738   brw->driContext = driContextPriv;
739   brw->intelScreen = screen;
740   brw->bufmgr = screen->bufmgr;
741
742   brw->gen = devinfo->gen;
743   brw->gt = devinfo->gt;
744   brw->is_g4x = devinfo->is_g4x;
745   brw->is_baytrail = devinfo->is_baytrail;
746   brw->is_haswell = devinfo->is_haswell;
747   brw->is_cherryview = devinfo->is_cherryview;
748   brw->is_broxton = devinfo->is_broxton;
749   brw->has_llc = devinfo->has_llc;
750   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
751   brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
752   brw->has_pln = devinfo->has_pln;
753   brw->has_compr4 = devinfo->has_compr4;
754   brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
755   brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
756   brw->needs_unlit_centroid_workaround =
757      devinfo->needs_unlit_centroid_workaround;
758
759   brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
760   brw->has_swizzling = screen->hw_has_swizzling;
761
762   brw->vs.base.stage = MESA_SHADER_VERTEX;
763   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
764   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
765   if (brw->gen >= 8) {
766      gen8_init_vtable_surface_functions(brw);
767      brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
768   } else if (brw->gen >= 7) {
769      gen7_init_vtable_surface_functions(brw);
770      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
771   } else if (brw->gen >= 6) {
772      gen6_init_vtable_surface_functions(brw);
773      brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
774   } else {
775      gen4_init_vtable_surface_functions(brw);
776      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
777   }
778
779   brw_init_driver_functions(brw, &functions);
780
781   if (notify_reset)
782      functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
783
784   struct gl_context *ctx = &brw->ctx;
785
786   if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
787      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
788      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
789      intelDestroyContext(driContextPriv);
790      return false;
791   }
792
793   driContextSetFlags(ctx, flags);
794
795   /* Initialize the software rasterizer and helper modules.
796    *
797    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
798    * software fallbacks (which we have to support on legacy GL to do weird
799    * glDrawPixels(), glBitmap(), and other functions).
800    */
801   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
802      _swrast_CreateContext(ctx);
803   }
804
805   _vbo_CreateContext(ctx);
806   if (ctx->swrast_context) {
807      _tnl_CreateContext(ctx);
808      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
809      _swsetup_CreateContext(ctx);
810
811      /* Configure swrast to match hardware characteristics: */
812      _swrast_allow_pixel_fog(ctx, false);
813      _swrast_allow_vertex_fog(ctx, true);
814   }
815
816   _mesa_meta_init(ctx);
817
818   brw_process_driconf_options(brw);
819
820   if (INTEL_DEBUG & DEBUG_PERF)
821      brw->perf_debug = true;
822
823   brw_initialize_context_constants(brw);
824
825   ctx->Const.ResetStrategy = notify_reset
826      ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
827
828   /* Reinitialize the context point state.  It depends on ctx->Const values. */
829   _mesa_init_point(ctx);
830
831   intel_fbo_init(brw);
832
833   intel_batchbuffer_init(brw);
834
835   if (brw->gen >= 6) {
836      /* Create a new hardware context.  Using a hardware context means that
837       * our GPU state will be saved/restored on context switch, allowing us
838       * to assume that the GPU is in the same state we left it in.
839       *
840       * This is required for transform feedback buffer offsets, query objects,
841       * and also allows us to reduce how much state we have to emit.
842       */
843      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
844
845      if (!brw->hw_ctx) {
846         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
847         intelDestroyContext(driContextPriv);
848         return false;
849      }
850   }
851
852   if (brw_init_pipe_control(brw, devinfo)) {
853      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
854      intelDestroyContext(driContextPriv);
855      return false;
856   }
857
858   brw_init_state(brw);
859
860   intelInitExtensions(ctx);
861
862   brw_init_surface_formats(brw);
863
864   brw->max_vs_threads = devinfo->max_vs_threads;
865   brw->max_hs_threads = devinfo->max_hs_threads;
866   brw->max_ds_threads = devinfo->max_ds_threads;
867   brw->max_gs_threads = devinfo->max_gs_threads;
868   brw->max_wm_threads = devinfo->max_wm_threads;
869   brw->max_cs_threads = devinfo->max_cs_threads;
870   brw->urb.size = devinfo->urb.size;
871   brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
872   brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
873   brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
874   brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
875   brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
876
877   brw_adjust_cs_context_constants(brw);
878
879   /* Estimate the size of the mappable aperture into the GTT.  There's an
880    * ioctl to get the whole GTT size, but not one to get the mappable subset.
881    * It turns out it's basically always 256MB, though some ancient hardware
882    * was smaller.
883    */
884   uint32_t gtt_size = 256 * 1024 * 1024;
885
886   /* We don't want to map two objects such that a memcpy between them would
887    * just fault one mapping in and then the other over and over forever.  So
888    * we would need to divide the GTT size by 2.  Additionally, some GTT is
889    * taken up by things like the framebuffer and the ringbuffer and such, so
890    * be more conservative.
891    */
892   brw->max_gtt_map_object_size = gtt_size / 4;
893
894   if (brw->gen == 6)
895      brw->urb.gs_present = false;
896
897   brw->prim_restart.in_progress = false;
898   brw->prim_restart.enable_cut_index = false;
899   brw->gs.enabled = false;
900   brw->sf.viewport_transform_enable = true;
901
902   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
903
904   brw->use_resource_streamer = screen->has_resource_streamer &&
905      (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
906       env_var_as_boolean("INTEL_USE_GATHER", false));
907
908   ctx->VertexProgram._MaintainTnlProgram = true;
909   ctx->FragmentProgram._MaintainTexEnvProgram = true;
910
911   brw_draw_init( brw );
912
913   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
914      /* Turn on some extra GL_ARB_debug_output generation. */
915      brw->perf_debug = true;
916   }
917
918   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
919      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
920
921   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
922      brw_init_shader_time(brw);
923
924   _mesa_compute_version(ctx);
925
926   _mesa_initialize_dispatch_tables(ctx);
927   _mesa_initialize_vbo_vtxfmt(ctx);
928
929   if (ctx->Extensions.AMD_performance_monitor) {
930      brw_init_performance_monitors(brw);
931   }
932
933   vbo_use_buffer_objects(ctx);
934   vbo_always_unmap_buffers(ctx);
935
936   return true;
937}
938
939void
940intelDestroyContext(__DRIcontext * driContextPriv)
941{
942   struct brw_context *brw =
943      (struct brw_context *) driContextPriv->driverPrivate;
944   struct gl_context *ctx = &brw->ctx;
945
946   /* Dump a final BMP in case the application doesn't call SwapBuffers */
947   if (INTEL_DEBUG & DEBUG_AUB) {
948      intel_batchbuffer_flush(brw);
949      aub_dump_bmp(&brw->ctx);
950   }
951
952   _mesa_meta_free(&brw->ctx);
953   brw_meta_fast_clear_free(brw);
954
955   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
956      /* Force a report. */
957      brw->shader_time.report_time = 0;
958
959      brw_collect_and_report_shader_time(brw);
960      brw_destroy_shader_time(brw);
961   }
962
963   brw_destroy_state(brw);
964   brw_draw_destroy(brw);
965
966   drm_intel_bo_unreference(brw->curbe.curbe_bo);
967   if (brw->vs.base.scratch_bo)
968      drm_intel_bo_unreference(brw->vs.base.scratch_bo);
969   if (brw->gs.base.scratch_bo)
970      drm_intel_bo_unreference(brw->gs.base.scratch_bo);
971   if (brw->wm.base.scratch_bo)
972      drm_intel_bo_unreference(brw->wm.base.scratch_bo);
973
974   gen7_reset_hw_bt_pool_offsets(brw);
975   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
976   brw->hw_bt_pool.bo = NULL;
977
978   drm_intel_gem_context_destroy(brw->hw_ctx);
979
980   if (ctx->swrast_context) {
981      _swsetup_DestroyContext(&brw->ctx);
982      _tnl_DestroyContext(&brw->ctx);
983   }
984   _vbo_DestroyContext(&brw->ctx);
985
986   if (ctx->swrast_context)
987      _swrast_DestroyContext(&brw->ctx);
988
989   brw_fini_pipe_control(brw);
990   intel_batchbuffer_free(brw);
991
992   drm_intel_bo_unreference(brw->throttle_batch[1]);
993   drm_intel_bo_unreference(brw->throttle_batch[0]);
994   brw->throttle_batch[1] = NULL;
995   brw->throttle_batch[0] = NULL;
996
997   driDestroyOptionCache(&brw->optionCache);
998
999   /* free the Mesa context */
1000   _mesa_free_context_data(&brw->ctx);
1001
1002   ralloc_free(brw);
1003   driContextPriv->driverPrivate = NULL;
1004}
1005
1006GLboolean
1007intelUnbindContext(__DRIcontext * driContextPriv)
1008{
1009   /* Unset current context and dispath table */
1010   _mesa_make_current(NULL, NULL, NULL);
1011
1012   return true;
1013}
1014
1015/**
1016 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1017 * on window system framebuffers.
1018 *
1019 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1020 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1021 * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1022 * for a visual where you're guaranteed to be capable, but it turns out that
1023 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1024 * incapable ones, because there's no difference between the two in resources
1025 * used.  Applications thus get built that accidentally rely on the default
1026 * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1027 * great...
1028 *
1029 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1030 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1031 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1032 * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1033 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1034 * and get no sRGB encode (assuming that both kinds of visual are available).
1035 * Thus our choice to support sRGB by default on our visuals for desktop would
1036 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1037 *
1038 * Unfortunately, renderbuffer setup happens before a context is created.  So
1039 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1040 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1041 * yet), we go turn that back off before anyone finds out.
1042 */
1043static void
1044intel_gles3_srgb_workaround(struct brw_context *brw,
1045                            struct gl_framebuffer *fb)
1046{
1047   struct gl_context *ctx = &brw->ctx;
1048
1049   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1050      return;
1051
1052   /* Some day when we support the sRGB capable bit on visuals available for
1053    * GLES, we'll need to respect that and not disable things here.
1054    */
1055   fb->Visual.sRGBCapable = false;
1056   for (int i = 0; i < BUFFER_COUNT; i++) {
1057      if (fb->Attachment[i].Renderbuffer &&
1058          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1059         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1060      }
1061   }
1062}
1063
1064GLboolean
1065intelMakeCurrent(__DRIcontext * driContextPriv,
1066                 __DRIdrawable * driDrawPriv,
1067                 __DRIdrawable * driReadPriv)
1068{
1069   struct brw_context *brw;
1070   GET_CURRENT_CONTEXT(curCtx);
1071
1072   if (driContextPriv)
1073      brw = (struct brw_context *) driContextPriv->driverPrivate;
1074   else
1075      brw = NULL;
1076
1077   /* According to the glXMakeCurrent() man page: "Pending commands to
1078    * the previous context, if any, are flushed before it is released."
1079    * But only flush if we're actually changing contexts.
1080    */
1081   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1082      _mesa_flush(curCtx);
1083   }
1084
1085   if (driContextPriv) {
1086      struct gl_context *ctx = &brw->ctx;
1087      struct gl_framebuffer *fb, *readFb;
1088
1089      if (driDrawPriv == NULL) {
1090         fb = _mesa_get_incomplete_framebuffer();
1091      } else {
1092         fb = driDrawPriv->driverPrivate;
1093         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1094      }
1095
1096      if (driReadPriv == NULL) {
1097         readFb = _mesa_get_incomplete_framebuffer();
1098      } else {
1099         readFb = driReadPriv->driverPrivate;
1100         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1101      }
1102
1103      /* The sRGB workaround changes the renderbuffer's format. We must change
1104       * the format before the renderbuffer's miptree get's allocated, otherwise
1105       * the formats of the renderbuffer and its miptree will differ.
1106       */
1107      intel_gles3_srgb_workaround(brw, fb);
1108      intel_gles3_srgb_workaround(brw, readFb);
1109
1110      /* If the context viewport hasn't been initialized, force a call out to
1111       * the loader to get buffers so we have a drawable size for the initial
1112       * viewport. */
1113      if (!brw->ctx.ViewportInitialized)
1114         intel_prepare_render(brw);
1115
1116      _mesa_make_current(ctx, fb, readFb);
1117   } else {
1118      _mesa_make_current(NULL, NULL, NULL);
1119   }
1120
1121   return true;
1122}
1123
1124void
1125intel_resolve_for_dri2_flush(struct brw_context *brw,
1126                             __DRIdrawable *drawable)
1127{
1128   if (brw->gen < 6) {
1129      /* MSAA and fast color clear are not supported, so don't waste time
1130       * checking whether a resolve is needed.
1131       */
1132      return;
1133   }
1134
1135   struct gl_framebuffer *fb = drawable->driverPrivate;
1136   struct intel_renderbuffer *rb;
1137
1138   /* Usually, only the back buffer will need to be downsampled. However,
1139    * the front buffer will also need it if the user has rendered into it.
1140    */
1141   static const gl_buffer_index buffers[2] = {
1142         BUFFER_BACK_LEFT,
1143         BUFFER_FRONT_LEFT,
1144   };
1145
1146   for (int i = 0; i < 2; ++i) {
1147      rb = intel_get_renderbuffer(fb, buffers[i]);
1148      if (rb == NULL || rb->mt == NULL)
1149         continue;
1150      if (rb->mt->num_samples <= 1)
1151         intel_miptree_resolve_color(brw, rb->mt);
1152      else
1153         intel_renderbuffer_downsample(brw, rb);
1154   }
1155}
1156
1157static unsigned
1158intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1159{
1160   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1161}
1162
1163static void
1164intel_query_dri2_buffers(struct brw_context *brw,
1165                         __DRIdrawable *drawable,
1166                         __DRIbuffer **buffers,
1167                         int *count);
1168
1169static void
1170intel_process_dri2_buffer(struct brw_context *brw,
1171                          __DRIdrawable *drawable,
1172                          __DRIbuffer *buffer,
1173                          struct intel_renderbuffer *rb,
1174                          const char *buffer_name);
1175
1176static void
1177intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1178
1179static void
1180intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1181{
1182   struct gl_framebuffer *fb = drawable->driverPrivate;
1183   struct intel_renderbuffer *rb;
1184   __DRIbuffer *buffers = NULL;
1185   int i, count;
1186   const char *region_name;
1187
1188   /* Set this up front, so that in case our buffers get invalidated
1189    * while we're getting new buffers, we don't clobber the stamp and
1190    * thus ignore the invalidate. */
1191   drawable->lastStamp = drawable->dri2.stamp;
1192
1193   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1194      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1195
1196   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1197
1198   if (buffers == NULL)
1199      return;
1200
1201   for (i = 0; i < count; i++) {
1202       switch (buffers[i].attachment) {
1203       case __DRI_BUFFER_FRONT_LEFT:
1204           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1205           region_name = "dri2 front buffer";
1206           break;
1207
1208       case __DRI_BUFFER_FAKE_FRONT_LEFT:
1209           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1210           region_name = "dri2 fake front buffer";
1211           break;
1212
1213       case __DRI_BUFFER_BACK_LEFT:
1214           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1215           region_name = "dri2 back buffer";
1216           break;
1217
1218       case __DRI_BUFFER_DEPTH:
1219       case __DRI_BUFFER_HIZ:
1220       case __DRI_BUFFER_DEPTH_STENCIL:
1221       case __DRI_BUFFER_STENCIL:
1222       case __DRI_BUFFER_ACCUM:
1223       default:
1224           fprintf(stderr,
1225                   "unhandled buffer attach event, attachment type %d\n",
1226                   buffers[i].attachment);
1227           return;
1228       }
1229
1230       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1231   }
1232
1233}
1234
1235void
1236intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1237{
1238   struct brw_context *brw = context->driverPrivate;
1239   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1240
1241   /* Set this up front, so that in case our buffers get invalidated
1242    * while we're getting new buffers, we don't clobber the stamp and
1243    * thus ignore the invalidate. */
1244   drawable->lastStamp = drawable->dri2.stamp;
1245
1246   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1247      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1248
1249   if (screen->image.loader)
1250      intel_update_image_buffers(brw, drawable);
1251   else
1252      intel_update_dri2_buffers(brw, drawable);
1253
1254   driUpdateFramebufferSize(&brw->ctx, drawable);
1255}
1256
1257/**
1258 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1259 * state is required.
1260 */
1261void
1262intel_prepare_render(struct brw_context *brw)
1263{
1264   struct gl_context *ctx = &brw->ctx;
1265   __DRIcontext *driContext = brw->driContext;
1266   __DRIdrawable *drawable;
1267
1268   drawable = driContext->driDrawablePriv;
1269   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1270      if (drawable->lastStamp != drawable->dri2.stamp)
1271         intel_update_renderbuffers(driContext, drawable);
1272      driContext->dri2.draw_stamp = drawable->dri2.stamp;
1273   }
1274
1275   drawable = driContext->driReadablePriv;
1276   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1277      if (drawable->lastStamp != drawable->dri2.stamp)
1278         intel_update_renderbuffers(driContext, drawable);
1279      driContext->dri2.read_stamp = drawable->dri2.stamp;
1280   }
1281
1282   /* If we're currently rendering to the front buffer, the rendering
1283    * that will happen next will probably dirty the front buffer.  So
1284    * mark it as dirty here.
1285    */
1286   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1287      brw->front_buffer_dirty = true;
1288}
1289
1290/**
1291 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1292 *
1293 * To determine which DRI buffers to request, examine the renderbuffers
1294 * attached to the drawable's framebuffer. Then request the buffers with
1295 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1296 *
1297 * This is called from intel_update_renderbuffers().
1298 *
1299 * \param drawable      Drawable whose buffers are queried.
1300 * \param buffers       [out] List of buffers returned by DRI2 query.
1301 * \param buffer_count  [out] Number of buffers returned.
1302 *
1303 * \see intel_update_renderbuffers()
1304 * \see DRI2GetBuffers()
1305 * \see DRI2GetBuffersWithFormat()
1306 */
1307static void
1308intel_query_dri2_buffers(struct brw_context *brw,
1309                         __DRIdrawable *drawable,
1310                         __DRIbuffer **buffers,
1311                         int *buffer_count)
1312{
1313   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1314   struct gl_framebuffer *fb = drawable->driverPrivate;
1315   int i = 0;
1316   unsigned attachments[8];
1317
1318   struct intel_renderbuffer *front_rb;
1319   struct intel_renderbuffer *back_rb;
1320
1321   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1322   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1323
1324   memset(attachments, 0, sizeof(attachments));
1325   if ((_mesa_is_front_buffer_drawing(fb) ||
1326        _mesa_is_front_buffer_reading(fb) ||
1327        !back_rb) && front_rb) {
1328      /* If a fake front buffer is in use, then querying for
1329       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1330       * the real front buffer to the fake front buffer.  So before doing the
1331       * query, we need to make sure all the pending drawing has landed in the
1332       * real front buffer.
1333       */
1334      intel_batchbuffer_flush(brw);
1335      intel_flush_front(&brw->ctx);
1336
1337      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1338      attachments[i++] = intel_bits_per_pixel(front_rb);
1339   } else if (front_rb && brw->front_buffer_dirty) {
1340      /* We have pending front buffer rendering, but we aren't querying for a
1341       * front buffer.  If the front buffer we have is a fake front buffer,
1342       * the X server is going to throw it away when it processes the query.
1343       * So before doing the query, make sure all the pending drawing has
1344       * landed in the real front buffer.
1345       */
1346      intel_batchbuffer_flush(brw);
1347      intel_flush_front(&brw->ctx);
1348   }
1349
1350   if (back_rb) {
1351      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1352      attachments[i++] = intel_bits_per_pixel(back_rb);
1353   }
1354
1355   assert(i <= ARRAY_SIZE(attachments));
1356
1357   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1358                                                        &drawable->w,
1359                                                        &drawable->h,
1360                                                        attachments, i / 2,
1361                                                        buffer_count,
1362                                                        drawable->loaderPrivate);
1363}
1364
1365/**
1366 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1367 *
1368 * This is called from intel_update_renderbuffers().
1369 *
1370 * \par Note:
1371 *    DRI buffers whose attachment point is DRI2BufferStencil or
1372 *    DRI2BufferDepthStencil are handled as special cases.
1373 *
1374 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1375 *        that is passed to drm_intel_bo_gem_create_from_name().
1376 *
1377 * \see intel_update_renderbuffers()
1378 */
1379static void
1380intel_process_dri2_buffer(struct brw_context *brw,
1381                          __DRIdrawable *drawable,
1382                          __DRIbuffer *buffer,
1383                          struct intel_renderbuffer *rb,
1384                          const char *buffer_name)
1385{
1386   struct gl_framebuffer *fb = drawable->driverPrivate;
1387   drm_intel_bo *bo;
1388
1389   if (!rb)
1390      return;
1391
1392   unsigned num_samples = rb->Base.Base.NumSamples;
1393
1394   /* We try to avoid closing and reopening the same BO name, because the first
1395    * use of a mapping of the buffer involves a bunch of page faulting which is
1396    * moderately expensive.
1397    */
1398   struct intel_mipmap_tree *last_mt;
1399   if (num_samples == 0)
1400      last_mt = rb->mt;
1401   else
1402      last_mt = rb->singlesample_mt;
1403
1404   uint32_t old_name = 0;
1405   if (last_mt) {
1406       /* The bo already has a name because the miptree was created by a
1407	* previous call to intel_process_dri2_buffer(). If a bo already has a
1408	* name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1409	* create a new name.
1410	*/
1411      drm_intel_bo_flink(last_mt->bo, &old_name);
1412   }
1413
1414   if (old_name == buffer->name)
1415      return;
1416
1417   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1418      fprintf(stderr,
1419              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1420              buffer->name, buffer->attachment,
1421              buffer->cpp, buffer->pitch);
1422   }
1423
1424   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1425                                          buffer->name);
1426   if (!bo) {
1427      fprintf(stderr,
1428              "Failed to open BO for returned DRI2 buffer "
1429              "(%dx%d, %s, named %d).\n"
1430              "This is likely a bug in the X Server that will lead to a "
1431              "crash soon.\n",
1432              drawable->w, drawable->h, buffer_name, buffer->name);
1433      return;
1434   }
1435
1436   intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1437                                            drawable->w, drawable->h,
1438                                            buffer->pitch);
1439
1440   if (_mesa_is_front_buffer_drawing(fb) &&
1441       (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1442        buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1443       rb->Base.Base.NumSamples > 1) {
1444      intel_renderbuffer_upsample(brw, rb);
1445   }
1446
1447   assert(rb->mt);
1448
1449   drm_intel_bo_unreference(bo);
1450}
1451
1452/**
1453 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1454 *
1455 * To determine which DRI buffers to request, examine the renderbuffers
1456 * attached to the drawable's framebuffer. Then request the buffers from
1457 * the image loader
1458 *
1459 * This is called from intel_update_renderbuffers().
1460 *
1461 * \param drawable      Drawable whose buffers are queried.
1462 * \param buffers       [out] List of buffers returned by DRI2 query.
1463 * \param buffer_count  [out] Number of buffers returned.
1464 *
1465 * \see intel_update_renderbuffers()
1466 */
1467
1468static void
1469intel_update_image_buffer(struct brw_context *intel,
1470                          __DRIdrawable *drawable,
1471                          struct intel_renderbuffer *rb,
1472                          __DRIimage *buffer,
1473                          enum __DRIimageBufferMask buffer_type)
1474{
1475   struct gl_framebuffer *fb = drawable->driverPrivate;
1476
1477   if (!rb || !buffer->bo)
1478      return;
1479
1480   unsigned num_samples = rb->Base.Base.NumSamples;
1481
1482   /* Check and see if we're already bound to the right
1483    * buffer object
1484    */
1485   struct intel_mipmap_tree *last_mt;
1486   if (num_samples == 0)
1487      last_mt = rb->mt;
1488   else
1489      last_mt = rb->singlesample_mt;
1490
1491   if (last_mt && last_mt->bo == buffer->bo)
1492      return;
1493
1494   intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1495                                            buffer->width, buffer->height,
1496                                            buffer->pitch);
1497
1498   if (_mesa_is_front_buffer_drawing(fb) &&
1499       buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1500       rb->Base.Base.NumSamples > 1) {
1501      intel_renderbuffer_upsample(intel, rb);
1502   }
1503}
1504
1505static void
1506intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1507{
1508   struct gl_framebuffer *fb = drawable->driverPrivate;
1509   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1510   struct intel_renderbuffer *front_rb;
1511   struct intel_renderbuffer *back_rb;
1512   struct __DRIimageList images;
1513   unsigned int format;
1514   uint32_t buffer_mask = 0;
1515
1516   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1517   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1518
1519   if (back_rb)
1520      format = intel_rb_format(back_rb);
1521   else if (front_rb)
1522      format = intel_rb_format(front_rb);
1523   else
1524      return;
1525
1526   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1527                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1528      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1529   }
1530
1531   if (back_rb)
1532      buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1533
1534   (*screen->image.loader->getBuffers) (drawable,
1535                                        driGLFormatToImageFormat(format),
1536                                        &drawable->dri2.stamp,
1537                                        drawable->loaderPrivate,
1538                                        buffer_mask,
1539                                        &images);
1540
1541   if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1542      drawable->w = images.front->width;
1543      drawable->h = images.front->height;
1544      intel_update_image_buffer(brw,
1545                                drawable,
1546                                front_rb,
1547                                images.front,
1548                                __DRI_IMAGE_BUFFER_FRONT);
1549   }
1550   if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1551      drawable->w = images.back->width;
1552      drawable->h = images.back->height;
1553      intel_update_image_buffer(brw,
1554                                drawable,
1555                                back_rb,
1556                                images.back,
1557                                __DRI_IMAGE_BUFFER_BACK);
1558   }
1559}
1560