brw_context.c revision 3dc3dbc8d826255d60e2aca8822b77619ace206a
1/* 2 Copyright 2003 VMware, Inc. 3 Copyright (C) Intel Corp. 2006. All Rights Reserved. 4 Intel funded Tungsten Graphics to 5 develop this 3D driver. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 "Software"), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice (including the 16 next paragraph) shall be included in all copies or substantial 17 portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 27 **********************************************************************/ 28 /* 29 * Authors: 30 * Keith Whitwell <keithw@vmware.com> 31 */ 32 33 34#include "main/api_exec.h" 35#include "main/context.h" 36#include "main/fbobject.h" 37#include "main/extensions.h" 38#include "main/imports.h" 39#include "main/macros.h" 40#include "main/points.h" 41#include "main/version.h" 42#include "main/vtxfmt.h" 43#include "main/texobj.h" 44#include "main/framebuffer.h" 45 46#include "vbo/vbo_context.h" 47 48#include "drivers/common/driverfuncs.h" 49#include "drivers/common/meta.h" 50#include "utils.h" 51 52#include "brw_context.h" 53#include "brw_defines.h" 54#include "brw_compiler.h" 55#include "brw_draw.h" 56#include "brw_state.h" 57 58#include "intel_batchbuffer.h" 59#include "intel_buffer_objects.h" 60#include "intel_buffers.h" 61#include "intel_fbo.h" 62#include "intel_mipmap_tree.h" 63#include "intel_pixel.h" 64#include "intel_image.h" 65#include "intel_tex.h" 66#include "intel_tex_obj.h" 67 68#include "swrast_setup/swrast_setup.h" 69#include "tnl/tnl.h" 70#include "tnl/t_pipeline.h" 71#include "util/ralloc.h" 72#include "util/debug.h" 73 74/*************************************** 75 * Mesa's Driver Functions 76 ***************************************/ 77 78const char *const brw_vendor_string = "Intel Open Source Technology Center"; 79 80const char * 81brw_get_renderer_string(unsigned deviceID) 82{ 83 const char *chipset; 84 static char buffer[128]; 85 86 switch (deviceID) { 87#undef CHIPSET 88#define CHIPSET(id, symbol, str) case id: chipset = str; break; 89#include "pci_ids/i965_pci_ids.h" 90 default: 91 chipset = "Unknown Intel Chipset"; 92 break; 93 } 94 95 (void) driGetRendererString(buffer, chipset, 0); 96 return buffer; 97} 98 99static const GLubyte * 100intel_get_string(struct gl_context * ctx, GLenum name) 101{ 102 const struct brw_context *const brw = brw_context(ctx); 103 104 switch (name) { 105 case GL_VENDOR: 106 return (GLubyte *) brw_vendor_string; 107 108 case GL_RENDERER: 109 return 110 (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID); 111 112 default: 113 return NULL; 114 } 115} 116 117static void 118intel_viewport(struct gl_context *ctx) 119{ 120 struct brw_context *brw = brw_context(ctx); 121 __DRIcontext *driContext = brw->driContext; 122 123 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 124 if (driContext->driDrawablePriv) 125 dri2InvalidateDrawable(driContext->driDrawablePriv); 126 if (driContext->driReadablePriv) 127 dri2InvalidateDrawable(driContext->driReadablePriv); 128 } 129} 130 131static void 132intel_update_framebuffer(struct gl_context *ctx, 133 struct gl_framebuffer *fb) 134{ 135 struct brw_context *brw = brw_context(ctx); 136 137 /* Quantize the derived default number of samples 138 */ 139 fb->DefaultGeometry._NumSamples = 140 intel_quantize_num_samples(brw->intelScreen, 141 fb->DefaultGeometry.NumSamples); 142} 143 144static void 145intel_update_state(struct gl_context * ctx, GLuint new_state) 146{ 147 struct brw_context *brw = brw_context(ctx); 148 struct intel_texture_object *tex_obj; 149 struct intel_renderbuffer *depth_irb; 150 151 if (ctx->swrast_context) 152 _swrast_InvalidateState(ctx, new_state); 153 _vbo_InvalidateState(ctx, new_state); 154 155 brw->NewGLState |= new_state; 156 157 _mesa_unlock_context_textures(ctx); 158 159 /* Resolve the depth buffer's HiZ buffer. */ 160 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 161 if (depth_irb) 162 intel_renderbuffer_resolve_hiz(brw, depth_irb); 163 164 /* Resolve depth buffer and render cache of each enabled texture. */ 165 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; 166 for (int i = 0; i <= maxEnabledUnit; i++) { 167 if (!ctx->Texture.Unit[i]._Current) 168 continue; 169 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 170 if (!tex_obj || !tex_obj->mt) 171 continue; 172 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); 173 /* Sampling engine understands lossless compression and resolving 174 * those surfaces should be skipped for performance reasons. 175 */ 176 intel_miptree_resolve_color(brw, tex_obj->mt, 177 INTEL_MIPTREE_IGNORE_CCS_E); 178 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 179 } 180 181 /* Resolve color for each active shader image. */ 182 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 183 const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ? 184 ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL; 185 186 if (unlikely(shader && shader->NumImages)) { 187 for (unsigned j = 0; j < shader->NumImages; j++) { 188 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]]; 189 tex_obj = intel_texture_object(u->TexObj); 190 191 if (tex_obj && tex_obj->mt) { 192 /* Access to images is implemented using indirect messages 193 * against data port. Normal render target write understands 194 * lossless compression but unfortunately the typed/untyped 195 * read/write interface doesn't. Therefore the compressed 196 * surfaces need to be resolved prior to accessing them. 197 */ 198 intel_miptree_resolve_color(brw, tex_obj->mt, 0); 199 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 200 } 201 } 202 } 203 } 204 205 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the 206 * single-sampled color renderbuffers because the CCS buffer isn't 207 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is 208 * enabled because otherwise the surface state will be programmed with the 209 * linear equivalent format anyway. 210 */ 211 if (brw->gen >= 9 && ctx->Color.sRGBEnabled) { 212 struct gl_framebuffer *fb = ctx->DrawBuffer; 213 for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { 214 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; 215 216 if (rb == NULL) 217 continue; 218 219 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 220 struct intel_mipmap_tree *mt = irb->mt; 221 222 if (mt == NULL || 223 mt->num_samples > 1 || 224 _mesa_get_srgb_format_linear(mt->format) == mt->format) 225 continue; 226 227 /* Lossless compression is not supported for SRGB formats, it 228 * should be impossible to get here with such surfaces. 229 */ 230 assert(!intel_miptree_is_lossless_compressed(brw, mt)); 231 intel_miptree_resolve_color(brw, mt, 0); 232 brw_render_cache_set_check_flush(brw, mt->bo); 233 } 234 } 235 236 _mesa_lock_context_textures(ctx); 237 238 if (new_state & _NEW_BUFFERS) { 239 intel_update_framebuffer(ctx, ctx->DrawBuffer); 240 if (ctx->DrawBuffer != ctx->ReadBuffer) 241 intel_update_framebuffer(ctx, ctx->ReadBuffer); 242 } 243} 244 245#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) 246 247static void 248intel_flush_front(struct gl_context *ctx) 249{ 250 struct brw_context *brw = brw_context(ctx); 251 __DRIcontext *driContext = brw->driContext; 252 __DRIdrawable *driDrawable = driContext->driDrawablePriv; 253 __DRIscreen *const screen = brw->intelScreen->driScrnPriv; 254 255 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { 256 if (flushFront(screen) && driDrawable && 257 driDrawable->loaderPrivate) { 258 259 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. 260 * 261 * This potentially resolves both front and back buffer. It 262 * is unnecessary to resolve the back, but harms nothing except 263 * performance. And no one cares about front-buffer render 264 * performance. 265 */ 266 intel_resolve_for_dri2_flush(brw, driDrawable); 267 intel_batchbuffer_flush(brw); 268 269 flushFront(screen)(driDrawable, driDrawable->loaderPrivate); 270 271 /* We set the dirty bit in intel_prepare_render() if we're 272 * front buffer rendering once we get there. 273 */ 274 brw->front_buffer_dirty = false; 275 } 276 } 277} 278 279static void 280intel_glFlush(struct gl_context *ctx) 281{ 282 struct brw_context *brw = brw_context(ctx); 283 284 intel_batchbuffer_flush(brw); 285 intel_flush_front(ctx); 286 287 brw->need_flush_throttle = true; 288} 289 290static void 291intel_finish(struct gl_context * ctx) 292{ 293 struct brw_context *brw = brw_context(ctx); 294 295 intel_glFlush(ctx); 296 297 if (brw->batch.last_bo) 298 drm_intel_bo_wait_rendering(brw->batch.last_bo); 299} 300 301static void 302brw_init_driver_functions(struct brw_context *brw, 303 struct dd_function_table *functions) 304{ 305 _mesa_init_driver_functions(functions); 306 307 /* GLX uses DRI2 invalidate events to handle window resizing. 308 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib), 309 * which doesn't provide a mechanism for snooping the event queues. 310 * 311 * So EGL still relies on viewport hacks to handle window resizing. 312 * This should go away with DRI3000. 313 */ 314 if (!brw->driContext->driScreenPriv->dri2.useInvalidate) 315 functions->Viewport = intel_viewport; 316 317 functions->Flush = intel_glFlush; 318 functions->Finish = intel_finish; 319 functions->GetString = intel_get_string; 320 functions->UpdateState = intel_update_state; 321 322 intelInitTextureFuncs(functions); 323 intelInitTextureImageFuncs(functions); 324 intelInitTextureSubImageFuncs(functions); 325 intelInitTextureCopyImageFuncs(functions); 326 intelInitCopyImageFuncs(functions); 327 intelInitClearFuncs(functions); 328 intelInitBufferFuncs(functions); 329 intelInitPixelFuncs(functions); 330 intelInitBufferObjectFuncs(functions); 331 intel_init_syncobj_functions(functions); 332 brw_init_object_purgeable_functions(functions); 333 334 brwInitFragProgFuncs( functions ); 335 brw_init_common_queryobj_functions(functions); 336 if (brw->gen >= 6) 337 gen6_init_queryobj_functions(functions); 338 else 339 gen4_init_queryobj_functions(functions); 340 brw_init_compute_functions(functions); 341 if (brw->gen >= 7) 342 brw_init_conditional_render_functions(functions); 343 344 functions->QueryInternalFormat = brw_query_internal_format; 345 346 functions->NewTransformFeedback = brw_new_transform_feedback; 347 functions->DeleteTransformFeedback = brw_delete_transform_feedback; 348 functions->GetTransformFeedbackVertexCount = 349 brw_get_transform_feedback_vertex_count; 350 if (brw->gen >= 7) { 351 functions->BeginTransformFeedback = gen7_begin_transform_feedback; 352 functions->EndTransformFeedback = gen7_end_transform_feedback; 353 functions->PauseTransformFeedback = gen7_pause_transform_feedback; 354 functions->ResumeTransformFeedback = gen7_resume_transform_feedback; 355 } else { 356 functions->BeginTransformFeedback = brw_begin_transform_feedback; 357 functions->EndTransformFeedback = brw_end_transform_feedback; 358 } 359 360 if (brw->gen >= 6) 361 functions->GetSamplePosition = gen6_get_sample_position; 362} 363 364static void 365brw_initialize_context_constants(struct brw_context *brw) 366{ 367 struct gl_context *ctx = &brw->ctx; 368 const struct brw_compiler *compiler = brw->intelScreen->compiler; 369 370 const bool stage_exists[MESA_SHADER_STAGES] = { 371 [MESA_SHADER_VERTEX] = true, 372 [MESA_SHADER_TESS_CTRL] = brw->gen >= 7, 373 [MESA_SHADER_TESS_EVAL] = brw->gen >= 7, 374 [MESA_SHADER_GEOMETRY] = brw->gen >= 6, 375 [MESA_SHADER_FRAGMENT] = true, 376 [MESA_SHADER_COMPUTE] = 377 (ctx->API == API_OPENGL_CORE && 378 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || 379 (ctx->API == API_OPENGLES2 && 380 ctx->Const.MaxComputeWorkGroupSize[0] >= 128) || 381 _mesa_extension_override_enables.ARB_compute_shader, 382 }; 383 384 unsigned num_stages = 0; 385 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 386 if (stage_exists[i]) 387 num_stages++; 388 } 389 390 unsigned max_samplers = 391 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; 392 393 ctx->Const.MaxDualSourceDrawBuffers = 1; 394 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; 395 ctx->Const.MaxCombinedShaderOutputResources = 396 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; 397 398 ctx->Const.QueryCounterBits.Timestamp = 36; 399 400 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ 401 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; 402 ctx->Const.MaxRenderbufferSize = 8192; 403 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); 404 ctx->Const.Max3DTextureLevels = 12; /* 2048 */ 405 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ 406 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; 407 ctx->Const.MaxTextureMbytes = 1536; 408 ctx->Const.MaxTextureRectSize = 1 << 12; 409 ctx->Const.MaxTextureMaxAnisotropy = 16.0; 410 ctx->Const.StripTextureBorder = true; 411 if (brw->gen >= 7) 412 ctx->Const.MaxProgramTextureGatherComponents = 4; 413 else if (brw->gen == 6) 414 ctx->Const.MaxProgramTextureGatherComponents = 1; 415 416 ctx->Const.MaxUniformBlockSize = 65536; 417 418 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 419 struct gl_program_constants *prog = &ctx->Const.Program[i]; 420 421 if (!stage_exists[i]) 422 continue; 423 424 prog->MaxTextureImageUnits = max_samplers; 425 426 prog->MaxUniformBlocks = BRW_MAX_UBO; 427 prog->MaxCombinedUniformComponents = 428 prog->MaxUniformComponents + 429 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; 430 431 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; 432 prog->MaxAtomicBuffers = BRW_MAX_ABO; 433 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; 434 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; 435 } 436 437 ctx->Const.MaxTextureUnits = 438 MIN2(ctx->Const.MaxTextureCoordUnits, 439 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); 440 441 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; 442 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; 443 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; 444 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; 445 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; 446 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; 447 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; 448 449 450 /* Hardware only supports a limited number of transform feedback buffers. 451 * So we need to override the Mesa default (which is based only on software 452 * limits). 453 */ 454 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; 455 456 /* On Gen6, in the worst case, we use up one binding table entry per 457 * transform feedback component (see comments above the definition of 458 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value 459 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to 460 * BRW_MAX_SOL_BINDINGS. 461 * 462 * In "separate components" mode, we need to divide this value by 463 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries 464 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. 465 */ 466 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; 467 ctx->Const.MaxTransformFeedbackSeparateComponents = 468 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; 469 470 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true; 471 472 int max_samples; 473 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); 474 const int clamp_max_samples = 475 driQueryOptioni(&brw->optionCache, "clamp_max_samples"); 476 477 if (clamp_max_samples < 0) { 478 max_samples = msaa_modes[0]; 479 } else { 480 /* Select the largest supported MSAA mode that does not exceed 481 * clamp_max_samples. 482 */ 483 max_samples = 0; 484 for (int i = 0; msaa_modes[i] != 0; ++i) { 485 if (msaa_modes[i] <= clamp_max_samples) { 486 max_samples = msaa_modes[i]; 487 break; 488 } 489 } 490 } 491 492 ctx->Const.MaxSamples = max_samples; 493 ctx->Const.MaxColorTextureSamples = max_samples; 494 ctx->Const.MaxDepthTextureSamples = max_samples; 495 ctx->Const.MaxIntegerSamples = max_samples; 496 ctx->Const.MaxImageSamples = 0; 497 498 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used 499 * to map indices of rectangular grid to sample numbers within a pixel. 500 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled 501 * extension implementation. For more details see the comment above 502 * gen6_set_sample_maps() definition. 503 */ 504 gen6_set_sample_maps(ctx); 505 506 ctx->Const.MinLineWidth = 1.0; 507 ctx->Const.MinLineWidthAA = 1.0; 508 if (brw->gen >= 6) { 509 ctx->Const.MaxLineWidth = 7.375; 510 ctx->Const.MaxLineWidthAA = 7.375; 511 ctx->Const.LineWidthGranularity = 0.125; 512 } else { 513 ctx->Const.MaxLineWidth = 7.0; 514 ctx->Const.MaxLineWidthAA = 7.0; 515 ctx->Const.LineWidthGranularity = 0.5; 516 } 517 518 /* For non-antialiased lines, we have to round the line width to the 519 * nearest whole number. Make sure that we don't advertise a line 520 * width that, when rounded, will be beyond the actual hardware 521 * maximum. 522 */ 523 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); 524 525 ctx->Const.MinPointSize = 1.0; 526 ctx->Const.MinPointSizeAA = 1.0; 527 ctx->Const.MaxPointSize = 255.0; 528 ctx->Const.MaxPointSizeAA = 255.0; 529 ctx->Const.PointSizeGranularity = 1.0; 530 531 if (brw->gen >= 5 || brw->is_g4x) 532 ctx->Const.MaxClipPlanes = 8; 533 534 ctx->Const.LowerTessLevel = true; 535 536 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; 537 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; 538 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; 539 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0; 540 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0; 541 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0; 542 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0; 543 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16; 544 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256; 545 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1; 546 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024; 547 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams = 548 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters, 549 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams); 550 551 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024; 552 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024; 553 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024; 554 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024; 555 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12; 556 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256; 557 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0; 558 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024; 559 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams = 560 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters, 561 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams); 562 563 /* Fragment shaders use real, 32-bit twos-complement integers for all 564 * integer types. 565 */ 566 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31; 567 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30; 568 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0; 569 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 570 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 571 572 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; 573 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; 574 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; 575 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 576 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 577 578 /* Gen6 converts quads to polygon in beginning of 3D pipeline, 579 * but we're not sure how it's actually done for vertex order, 580 * that affect provoking vertex decision. Always use last vertex 581 * convention for quad primitive which works as expected for now. 582 */ 583 if (brw->gen >= 6) 584 ctx->Const.QuadsFollowProvokingVertexConvention = false; 585 586 ctx->Const.NativeIntegers = true; 587 ctx->Const.VertexID_is_zero_based = true; 588 589 /* Regarding the CMP instruction, the Ivybridge PRM says: 590 * 591 * "For each enabled channel 0b or 1b is assigned to the appropriate flag 592 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord 593 * 0xFFFFFFFF) is assigned to dst." 594 * 595 * but PRMs for earlier generations say 596 * 597 * "In dword format, one GRF may store up to 8 results. When the register 598 * is used later as a vector of Booleans, as only LSB at each channel 599 * contains meaning [sic] data, software should make sure all higher bits 600 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." 601 * 602 * We select the representation of a true boolean uniform to be ~0, and fix 603 * the results of Gen <= 5 CMP instruction's with -(result & 1). 604 */ 605 ctx->Const.UniformBooleanTrue = ~0; 606 607 /* From the gen4 PRM, volume 4 page 127: 608 * 609 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies 610 * the base address of the first element of the surface, computed in 611 * software by adding the surface base address to the byte offset of 612 * the element in the buffer." 613 * 614 * However, unaligned accesses are slower, so enforce buffer alignment. 615 */ 616 ctx->Const.UniformBufferOffsetAlignment = 16; 617 618 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so 619 * that we can safely have the CPU and GPU writing the same SSBO on 620 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never 621 * writes, so there's no problem. For an SSBO, the GPU and the CPU can 622 * be updating disjoint regions of the buffer simultaneously and that will 623 * break if the regions overlap the same cacheline. 624 */ 625 ctx->Const.ShaderStorageBufferOffsetAlignment = 64; 626 ctx->Const.TextureBufferOffsetAlignment = 16; 627 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; 628 629 if (brw->gen >= 6) { 630 ctx->Const.MaxVarying = 32; 631 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; 632 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64; 633 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; 634 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; 635 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; 636 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128; 637 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128; 638 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128; 639 } 640 641 /* We want the GLSL compiler to emit code that uses condition codes */ 642 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 643 ctx->Const.ShaderCompilerOptions[i] = 644 brw->intelScreen->compiler->glsl_compiler_options[i]; 645 } 646 647 if (brw->gen >= 7) { 648 ctx->Const.MaxViewportWidth = 32768; 649 ctx->Const.MaxViewportHeight = 32768; 650 } 651 652 /* ARB_viewport_array */ 653 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { 654 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; 655 ctx->Const.ViewportSubpixelBits = 0; 656 657 /* Cast to float before negating because MaxViewportWidth is unsigned. 658 */ 659 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; 660 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; 661 } 662 663 /* ARB_gpu_shader5 */ 664 if (brw->gen >= 7) 665 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); 666 667 /* ARB_framebuffer_no_attachments */ 668 ctx->Const.MaxFramebufferWidth = 16384; 669 ctx->Const.MaxFramebufferHeight = 16384; 670 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; 671 ctx->Const.MaxFramebufferSamples = max_samples; 672} 673 674static void 675brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads) 676{ 677 struct gl_context *ctx = &brw->ctx; 678 679 /* For ES, we set these constants based on SIMD8. 680 * 681 * TODO: Once we can always generate SIMD16, we should update this. 682 * 683 * For GL, we assume we can generate a SIMD16 program, but this currently 684 * is not always true. This allows us to run more test cases, and will be 685 * required based on desktop GL compute shader requirements. 686 */ 687 const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; 688 689 const uint32_t max_invocations = simd_size * max_threads; 690 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; 691 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; 692 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; 693 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; 694 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; 695} 696 697/** 698 * Process driconf (drirc) options, setting appropriate context flags. 699 * 700 * intelInitExtensions still pokes at optionCache directly, in order to 701 * avoid advertising various extensions. No flags are set, so it makes 702 * sense to continue doing that there. 703 */ 704static void 705brw_process_driconf_options(struct brw_context *brw) 706{ 707 struct gl_context *ctx = &brw->ctx; 708 709 driOptionCache *options = &brw->optionCache; 710 driParseConfigFiles(options, &brw->intelScreen->optionCache, 711 brw->driContext->driScreenPriv->myNum, "i965"); 712 713 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); 714 switch (bo_reuse_mode) { 715 case DRI_CONF_BO_REUSE_DISABLED: 716 break; 717 case DRI_CONF_BO_REUSE_ALL: 718 intel_bufmgr_gem_enable_reuse(brw->bufmgr); 719 break; 720 } 721 722 if (!driQueryOptionb(options, "hiz")) { 723 brw->has_hiz = false; 724 /* On gen6, you can only do separate stencil with HIZ. */ 725 if (brw->gen == 6) 726 brw->has_separate_stencil = false; 727 } 728 729 if (driQueryOptionb(options, "always_flush_batch")) { 730 fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); 731 brw->always_flush_batch = true; 732 } 733 734 if (driQueryOptionb(options, "always_flush_cache")) { 735 fprintf(stderr, "flushing GPU caches before/after each draw call\n"); 736 brw->always_flush_cache = true; 737 } 738 739 if (driQueryOptionb(options, "disable_throttling")) { 740 fprintf(stderr, "disabling flush throttling\n"); 741 brw->disable_throttling = true; 742 } 743 744 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); 745 746 ctx->Const.ForceGLSLExtensionsWarn = 747 driQueryOptionb(options, "force_glsl_extensions_warn"); 748 749 ctx->Const.DisableGLSLLineContinuations = 750 driQueryOptionb(options, "disable_glsl_line_continuations"); 751 752 ctx->Const.AllowGLSLExtensionDirectiveMidShader = 753 driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); 754 755 brw->dual_color_blend_by_location = 756 driQueryOptionb(options, "dual_color_blend_by_location"); 757} 758 759GLboolean 760brwCreateContext(gl_api api, 761 const struct gl_config *mesaVis, 762 __DRIcontext *driContextPriv, 763 unsigned major_version, 764 unsigned minor_version, 765 uint32_t flags, 766 bool notify_reset, 767 unsigned *dri_ctx_error, 768 void *sharedContextPrivate) 769{ 770 __DRIscreen *sPriv = driContextPriv->driScreenPriv; 771 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; 772 struct intel_screen *screen = sPriv->driverPrivate; 773 const struct brw_device_info *devinfo = screen->devinfo; 774 struct dd_function_table functions; 775 776 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel 777 * provides us with context reset notifications. 778 */ 779 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG 780 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE; 781 782 if (screen->has_context_reset_notification) 783 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; 784 785 if (flags & ~allowed_flags) { 786 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; 787 return false; 788 } 789 790 struct brw_context *brw = rzalloc(NULL, struct brw_context); 791 if (!brw) { 792 fprintf(stderr, "%s: failed to alloc context\n", __func__); 793 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 794 return false; 795 } 796 797 driContextPriv->driverPrivate = brw; 798 brw->driContext = driContextPriv; 799 brw->intelScreen = screen; 800 brw->bufmgr = screen->bufmgr; 801 802 brw->gen = devinfo->gen; 803 brw->gt = devinfo->gt; 804 brw->is_g4x = devinfo->is_g4x; 805 brw->is_baytrail = devinfo->is_baytrail; 806 brw->is_haswell = devinfo->is_haswell; 807 brw->is_cherryview = devinfo->is_cherryview; 808 brw->is_broxton = devinfo->is_broxton; 809 brw->has_llc = devinfo->has_llc; 810 brw->has_hiz = devinfo->has_hiz_and_separate_stencil; 811 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; 812 brw->has_pln = devinfo->has_pln; 813 brw->has_compr4 = devinfo->has_compr4; 814 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; 815 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; 816 brw->needs_unlit_centroid_workaround = 817 devinfo->needs_unlit_centroid_workaround; 818 819 brw->must_use_separate_stencil = devinfo->must_use_separate_stencil; 820 brw->has_swizzling = screen->hw_has_swizzling; 821 822 brw->vs.base.stage = MESA_SHADER_VERTEX; 823 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; 824 brw->tes.base.stage = MESA_SHADER_TESS_EVAL; 825 brw->gs.base.stage = MESA_SHADER_GEOMETRY; 826 brw->wm.base.stage = MESA_SHADER_FRAGMENT; 827 if (brw->gen >= 8) { 828 gen8_init_vtable_surface_functions(brw); 829 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; 830 } else if (brw->gen >= 7) { 831 gen7_init_vtable_surface_functions(brw); 832 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; 833 } else if (brw->gen >= 6) { 834 gen6_init_vtable_surface_functions(brw); 835 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; 836 } else { 837 gen4_init_vtable_surface_functions(brw); 838 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; 839 } 840 841 brw_init_driver_functions(brw, &functions); 842 843 if (notify_reset) 844 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; 845 846 struct gl_context *ctx = &brw->ctx; 847 848 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { 849 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 850 fprintf(stderr, "%s: failed to init mesa context\n", __func__); 851 intelDestroyContext(driContextPriv); 852 return false; 853 } 854 855 driContextSetFlags(ctx, flags); 856 857 /* Initialize the software rasterizer and helper modules. 858 * 859 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for 860 * software fallbacks (which we have to support on legacy GL to do weird 861 * glDrawPixels(), glBitmap(), and other functions). 862 */ 863 if (api != API_OPENGL_CORE && api != API_OPENGLES2) { 864 _swrast_CreateContext(ctx); 865 } 866 867 _vbo_CreateContext(ctx); 868 if (ctx->swrast_context) { 869 _tnl_CreateContext(ctx); 870 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; 871 _swsetup_CreateContext(ctx); 872 873 /* Configure swrast to match hardware characteristics: */ 874 _swrast_allow_pixel_fog(ctx, false); 875 _swrast_allow_vertex_fog(ctx, true); 876 } 877 878 _mesa_meta_init(ctx); 879 880 brw_process_driconf_options(brw); 881 882 if (INTEL_DEBUG & DEBUG_PERF) 883 brw->perf_debug = true; 884 885 brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads); 886 brw_initialize_context_constants(brw); 887 888 ctx->Const.ResetStrategy = notify_reset 889 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; 890 891 /* Reinitialize the context point state. It depends on ctx->Const values. */ 892 _mesa_init_point(ctx); 893 894 intel_fbo_init(brw); 895 896 intel_batchbuffer_init(brw); 897 898 if (brw->gen >= 6) { 899 /* Create a new hardware context. Using a hardware context means that 900 * our GPU state will be saved/restored on context switch, allowing us 901 * to assume that the GPU is in the same state we left it in. 902 * 903 * This is required for transform feedback buffer offsets, query objects, 904 * and also allows us to reduce how much state we have to emit. 905 */ 906 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); 907 908 if (!brw->hw_ctx) { 909 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); 910 intelDestroyContext(driContextPriv); 911 return false; 912 } 913 } 914 915 if (brw_init_pipe_control(brw, devinfo)) { 916 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 917 intelDestroyContext(driContextPriv); 918 return false; 919 } 920 921 brw_init_state(brw); 922 923 intelInitExtensions(ctx); 924 925 brw_init_surface_formats(brw); 926 927 brw->max_vs_threads = devinfo->max_vs_threads; 928 brw->max_hs_threads = devinfo->max_hs_threads; 929 brw->max_ds_threads = devinfo->max_ds_threads; 930 brw->max_gs_threads = devinfo->max_gs_threads; 931 brw->max_wm_threads = devinfo->max_wm_threads; 932 /* FINISHME: Do this for all platforms that the kernel supports */ 933 if (brw->is_cherryview && 934 screen->subslice_total > 0 && screen->eu_total > 0) { 935 /* Logical CS threads = EUs per subslice * 7 threads per EU */ 936 brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7; 937 938 /* Fuse configurations may give more threads than expected, never less. */ 939 if (brw->max_cs_threads < devinfo->max_cs_threads) 940 brw->max_cs_threads = devinfo->max_cs_threads; 941 } else { 942 brw->max_cs_threads = devinfo->max_cs_threads; 943 } 944 brw->urb.size = devinfo->urb.size; 945 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; 946 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; 947 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; 948 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; 949 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; 950 951 /* Estimate the size of the mappable aperture into the GTT. There's an 952 * ioctl to get the whole GTT size, but not one to get the mappable subset. 953 * It turns out it's basically always 256MB, though some ancient hardware 954 * was smaller. 955 */ 956 uint32_t gtt_size = 256 * 1024 * 1024; 957 958 /* We don't want to map two objects such that a memcpy between them would 959 * just fault one mapping in and then the other over and over forever. So 960 * we would need to divide the GTT size by 2. Additionally, some GTT is 961 * taken up by things like the framebuffer and the ringbuffer and such, so 962 * be more conservative. 963 */ 964 brw->max_gtt_map_object_size = gtt_size / 4; 965 966 if (brw->gen == 6) 967 brw->urb.gs_present = false; 968 969 brw->prim_restart.in_progress = false; 970 brw->prim_restart.enable_cut_index = false; 971 brw->gs.enabled = false; 972 brw->sf.viewport_transform_enable = true; 973 974 brw->predicate.state = BRW_PREDICATE_STATE_RENDER; 975 976 brw->use_resource_streamer = screen->has_resource_streamer && 977 (env_var_as_boolean("INTEL_USE_HW_BT", false) || 978 env_var_as_boolean("INTEL_USE_GATHER", false)); 979 980 ctx->VertexProgram._MaintainTnlProgram = true; 981 ctx->FragmentProgram._MaintainTexEnvProgram = true; 982 983 brw_draw_init( brw ); 984 985 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { 986 /* Turn on some extra GL_ARB_debug_output generation. */ 987 brw->perf_debug = true; 988 } 989 990 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) 991 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; 992 993 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 994 brw_init_shader_time(brw); 995 996 _mesa_compute_version(ctx); 997 998 _mesa_initialize_dispatch_tables(ctx); 999 _mesa_initialize_vbo_vtxfmt(ctx); 1000 1001 if (ctx->Extensions.AMD_performance_monitor) { 1002 brw_init_performance_monitors(brw); 1003 } 1004 1005 vbo_use_buffer_objects(ctx); 1006 vbo_always_unmap_buffers(ctx); 1007 1008 return true; 1009} 1010 1011void 1012intelDestroyContext(__DRIcontext * driContextPriv) 1013{ 1014 struct brw_context *brw = 1015 (struct brw_context *) driContextPriv->driverPrivate; 1016 struct gl_context *ctx = &brw->ctx; 1017 1018 /* Dump a final BMP in case the application doesn't call SwapBuffers */ 1019 if (INTEL_DEBUG & DEBUG_AUB) { 1020 intel_batchbuffer_flush(brw); 1021 aub_dump_bmp(&brw->ctx); 1022 } 1023 1024 _mesa_meta_free(&brw->ctx); 1025 brw_meta_fast_clear_free(brw); 1026 1027 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 1028 /* Force a report. */ 1029 brw->shader_time.report_time = 0; 1030 1031 brw_collect_and_report_shader_time(brw); 1032 brw_destroy_shader_time(brw); 1033 } 1034 1035 brw_destroy_state(brw); 1036 brw_draw_destroy(brw); 1037 1038 drm_intel_bo_unreference(brw->curbe.curbe_bo); 1039 if (brw->vs.base.scratch_bo) 1040 drm_intel_bo_unreference(brw->vs.base.scratch_bo); 1041 if (brw->gs.base.scratch_bo) 1042 drm_intel_bo_unreference(brw->gs.base.scratch_bo); 1043 if (brw->wm.base.scratch_bo) 1044 drm_intel_bo_unreference(brw->wm.base.scratch_bo); 1045 1046 gen7_reset_hw_bt_pool_offsets(brw); 1047 drm_intel_bo_unreference(brw->hw_bt_pool.bo); 1048 brw->hw_bt_pool.bo = NULL; 1049 1050 drm_intel_gem_context_destroy(brw->hw_ctx); 1051 1052 if (ctx->swrast_context) { 1053 _swsetup_DestroyContext(&brw->ctx); 1054 _tnl_DestroyContext(&brw->ctx); 1055 } 1056 _vbo_DestroyContext(&brw->ctx); 1057 1058 if (ctx->swrast_context) 1059 _swrast_DestroyContext(&brw->ctx); 1060 1061 brw_fini_pipe_control(brw); 1062 intel_batchbuffer_free(brw); 1063 1064 drm_intel_bo_unreference(brw->throttle_batch[1]); 1065 drm_intel_bo_unreference(brw->throttle_batch[0]); 1066 brw->throttle_batch[1] = NULL; 1067 brw->throttle_batch[0] = NULL; 1068 1069 driDestroyOptionCache(&brw->optionCache); 1070 1071 /* free the Mesa context */ 1072 _mesa_free_context_data(&brw->ctx); 1073 1074 ralloc_free(brw); 1075 driContextPriv->driverPrivate = NULL; 1076} 1077 1078GLboolean 1079intelUnbindContext(__DRIcontext * driContextPriv) 1080{ 1081 /* Unset current context and dispath table */ 1082 _mesa_make_current(NULL, NULL, NULL); 1083 1084 return true; 1085} 1086 1087/** 1088 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior 1089 * on window system framebuffers. 1090 * 1091 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if 1092 * your renderbuffer can do sRGB encode, and you can flip a switch that does 1093 * sRGB encode if the renderbuffer can handle it. You can ask specifically 1094 * for a visual where you're guaranteed to be capable, but it turns out that 1095 * everyone just makes all their ARGB8888 visuals capable and doesn't offer 1096 * incapable ones, because there's no difference between the two in resources 1097 * used. Applications thus get built that accidentally rely on the default 1098 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds 1099 * great... 1100 * 1101 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode 1102 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. 1103 * So they removed the enable knob and made it "if the renderbuffer is sRGB 1104 * capable, do sRGB encode". Then, for your window system renderbuffers, you 1105 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals 1106 * and get no sRGB encode (assuming that both kinds of visual are available). 1107 * Thus our choice to support sRGB by default on our visuals for desktop would 1108 * result in broken rendering of GLES apps that aren't expecting sRGB encode. 1109 * 1110 * Unfortunately, renderbuffer setup happens before a context is created. So 1111 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 1112 * context (without an sRGB visual, though we don't have sRGB visuals exposed 1113 * yet), we go turn that back off before anyone finds out. 1114 */ 1115static void 1116intel_gles3_srgb_workaround(struct brw_context *brw, 1117 struct gl_framebuffer *fb) 1118{ 1119 struct gl_context *ctx = &brw->ctx; 1120 1121 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) 1122 return; 1123 1124 /* Some day when we support the sRGB capable bit on visuals available for 1125 * GLES, we'll need to respect that and not disable things here. 1126 */ 1127 fb->Visual.sRGBCapable = false; 1128 for (int i = 0; i < BUFFER_COUNT; i++) { 1129 if (fb->Attachment[i].Renderbuffer && 1130 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) { 1131 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM; 1132 } 1133 } 1134} 1135 1136GLboolean 1137intelMakeCurrent(__DRIcontext * driContextPriv, 1138 __DRIdrawable * driDrawPriv, 1139 __DRIdrawable * driReadPriv) 1140{ 1141 struct brw_context *brw; 1142 GET_CURRENT_CONTEXT(curCtx); 1143 1144 if (driContextPriv) 1145 brw = (struct brw_context *) driContextPriv->driverPrivate; 1146 else 1147 brw = NULL; 1148 1149 /* According to the glXMakeCurrent() man page: "Pending commands to 1150 * the previous context, if any, are flushed before it is released." 1151 * But only flush if we're actually changing contexts. 1152 */ 1153 if (brw_context(curCtx) && brw_context(curCtx) != brw) { 1154 _mesa_flush(curCtx); 1155 } 1156 1157 if (driContextPriv) { 1158 struct gl_context *ctx = &brw->ctx; 1159 struct gl_framebuffer *fb, *readFb; 1160 1161 if (driDrawPriv == NULL) { 1162 fb = _mesa_get_incomplete_framebuffer(); 1163 } else { 1164 fb = driDrawPriv->driverPrivate; 1165 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; 1166 } 1167 1168 if (driReadPriv == NULL) { 1169 readFb = _mesa_get_incomplete_framebuffer(); 1170 } else { 1171 readFb = driReadPriv->driverPrivate; 1172 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; 1173 } 1174 1175 /* The sRGB workaround changes the renderbuffer's format. We must change 1176 * the format before the renderbuffer's miptree get's allocated, otherwise 1177 * the formats of the renderbuffer and its miptree will differ. 1178 */ 1179 intel_gles3_srgb_workaround(brw, fb); 1180 intel_gles3_srgb_workaround(brw, readFb); 1181 1182 /* If the context viewport hasn't been initialized, force a call out to 1183 * the loader to get buffers so we have a drawable size for the initial 1184 * viewport. */ 1185 if (!brw->ctx.ViewportInitialized) 1186 intel_prepare_render(brw); 1187 1188 _mesa_make_current(ctx, fb, readFb); 1189 } else { 1190 _mesa_make_current(NULL, NULL, NULL); 1191 } 1192 1193 return true; 1194} 1195 1196void 1197intel_resolve_for_dri2_flush(struct brw_context *brw, 1198 __DRIdrawable *drawable) 1199{ 1200 if (brw->gen < 6) { 1201 /* MSAA and fast color clear are not supported, so don't waste time 1202 * checking whether a resolve is needed. 1203 */ 1204 return; 1205 } 1206 1207 struct gl_framebuffer *fb = drawable->driverPrivate; 1208 struct intel_renderbuffer *rb; 1209 1210 /* Usually, only the back buffer will need to be downsampled. However, 1211 * the front buffer will also need it if the user has rendered into it. 1212 */ 1213 static const gl_buffer_index buffers[2] = { 1214 BUFFER_BACK_LEFT, 1215 BUFFER_FRONT_LEFT, 1216 }; 1217 1218 for (int i = 0; i < 2; ++i) { 1219 rb = intel_get_renderbuffer(fb, buffers[i]); 1220 if (rb == NULL || rb->mt == NULL) 1221 continue; 1222 if (rb->mt->num_samples <= 1) 1223 intel_miptree_resolve_color(brw, rb->mt, 0); 1224 else 1225 intel_renderbuffer_downsample(brw, rb); 1226 } 1227} 1228 1229static unsigned 1230intel_bits_per_pixel(const struct intel_renderbuffer *rb) 1231{ 1232 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; 1233} 1234 1235static void 1236intel_query_dri2_buffers(struct brw_context *brw, 1237 __DRIdrawable *drawable, 1238 __DRIbuffer **buffers, 1239 int *count); 1240 1241static void 1242intel_process_dri2_buffer(struct brw_context *brw, 1243 __DRIdrawable *drawable, 1244 __DRIbuffer *buffer, 1245 struct intel_renderbuffer *rb, 1246 const char *buffer_name); 1247 1248static void 1249intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable); 1250 1251static void 1252intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1253{ 1254 struct gl_framebuffer *fb = drawable->driverPrivate; 1255 struct intel_renderbuffer *rb; 1256 __DRIbuffer *buffers = NULL; 1257 int i, count; 1258 const char *region_name; 1259 1260 /* Set this up front, so that in case our buffers get invalidated 1261 * while we're getting new buffers, we don't clobber the stamp and 1262 * thus ignore the invalidate. */ 1263 drawable->lastStamp = drawable->dri2.stamp; 1264 1265 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1266 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1267 1268 intel_query_dri2_buffers(brw, drawable, &buffers, &count); 1269 1270 if (buffers == NULL) 1271 return; 1272 1273 for (i = 0; i < count; i++) { 1274 switch (buffers[i].attachment) { 1275 case __DRI_BUFFER_FRONT_LEFT: 1276 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1277 region_name = "dri2 front buffer"; 1278 break; 1279 1280 case __DRI_BUFFER_FAKE_FRONT_LEFT: 1281 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1282 region_name = "dri2 fake front buffer"; 1283 break; 1284 1285 case __DRI_BUFFER_BACK_LEFT: 1286 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1287 region_name = "dri2 back buffer"; 1288 break; 1289 1290 case __DRI_BUFFER_DEPTH: 1291 case __DRI_BUFFER_HIZ: 1292 case __DRI_BUFFER_DEPTH_STENCIL: 1293 case __DRI_BUFFER_STENCIL: 1294 case __DRI_BUFFER_ACCUM: 1295 default: 1296 fprintf(stderr, 1297 "unhandled buffer attach event, attachment type %d\n", 1298 buffers[i].attachment); 1299 return; 1300 } 1301 1302 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name); 1303 } 1304 1305} 1306 1307void 1308intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) 1309{ 1310 struct brw_context *brw = context->driverPrivate; 1311 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1312 1313 /* Set this up front, so that in case our buffers get invalidated 1314 * while we're getting new buffers, we don't clobber the stamp and 1315 * thus ignore the invalidate. */ 1316 drawable->lastStamp = drawable->dri2.stamp; 1317 1318 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1319 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1320 1321 if (screen->image.loader) 1322 intel_update_image_buffers(brw, drawable); 1323 else 1324 intel_update_dri2_buffers(brw, drawable); 1325 1326 driUpdateFramebufferSize(&brw->ctx, drawable); 1327} 1328 1329/** 1330 * intel_prepare_render should be called anywhere that curent read/drawbuffer 1331 * state is required. 1332 */ 1333void 1334intel_prepare_render(struct brw_context *brw) 1335{ 1336 struct gl_context *ctx = &brw->ctx; 1337 __DRIcontext *driContext = brw->driContext; 1338 __DRIdrawable *drawable; 1339 1340 drawable = driContext->driDrawablePriv; 1341 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { 1342 if (drawable->lastStamp != drawable->dri2.stamp) 1343 intel_update_renderbuffers(driContext, drawable); 1344 driContext->dri2.draw_stamp = drawable->dri2.stamp; 1345 } 1346 1347 drawable = driContext->driReadablePriv; 1348 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { 1349 if (drawable->lastStamp != drawable->dri2.stamp) 1350 intel_update_renderbuffers(driContext, drawable); 1351 driContext->dri2.read_stamp = drawable->dri2.stamp; 1352 } 1353 1354 /* If we're currently rendering to the front buffer, the rendering 1355 * that will happen next will probably dirty the front buffer. So 1356 * mark it as dirty here. 1357 */ 1358 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) 1359 brw->front_buffer_dirty = true; 1360} 1361 1362/** 1363 * \brief Query DRI2 to obtain a DRIdrawable's buffers. 1364 * 1365 * To determine which DRI buffers to request, examine the renderbuffers 1366 * attached to the drawable's framebuffer. Then request the buffers with 1367 * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). 1368 * 1369 * This is called from intel_update_renderbuffers(). 1370 * 1371 * \param drawable Drawable whose buffers are queried. 1372 * \param buffers [out] List of buffers returned by DRI2 query. 1373 * \param buffer_count [out] Number of buffers returned. 1374 * 1375 * \see intel_update_renderbuffers() 1376 * \see DRI2GetBuffers() 1377 * \see DRI2GetBuffersWithFormat() 1378 */ 1379static void 1380intel_query_dri2_buffers(struct brw_context *brw, 1381 __DRIdrawable *drawable, 1382 __DRIbuffer **buffers, 1383 int *buffer_count) 1384{ 1385 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1386 struct gl_framebuffer *fb = drawable->driverPrivate; 1387 int i = 0; 1388 unsigned attachments[8]; 1389 1390 struct intel_renderbuffer *front_rb; 1391 struct intel_renderbuffer *back_rb; 1392 1393 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1394 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1395 1396 memset(attachments, 0, sizeof(attachments)); 1397 if ((_mesa_is_front_buffer_drawing(fb) || 1398 _mesa_is_front_buffer_reading(fb) || 1399 !back_rb) && front_rb) { 1400 /* If a fake front buffer is in use, then querying for 1401 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from 1402 * the real front buffer to the fake front buffer. So before doing the 1403 * query, we need to make sure all the pending drawing has landed in the 1404 * real front buffer. 1405 */ 1406 intel_batchbuffer_flush(brw); 1407 intel_flush_front(&brw->ctx); 1408 1409 attachments[i++] = __DRI_BUFFER_FRONT_LEFT; 1410 attachments[i++] = intel_bits_per_pixel(front_rb); 1411 } else if (front_rb && brw->front_buffer_dirty) { 1412 /* We have pending front buffer rendering, but we aren't querying for a 1413 * front buffer. If the front buffer we have is a fake front buffer, 1414 * the X server is going to throw it away when it processes the query. 1415 * So before doing the query, make sure all the pending drawing has 1416 * landed in the real front buffer. 1417 */ 1418 intel_batchbuffer_flush(brw); 1419 intel_flush_front(&brw->ctx); 1420 } 1421 1422 if (back_rb) { 1423 attachments[i++] = __DRI_BUFFER_BACK_LEFT; 1424 attachments[i++] = intel_bits_per_pixel(back_rb); 1425 } 1426 1427 assert(i <= ARRAY_SIZE(attachments)); 1428 1429 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable, 1430 &drawable->w, 1431 &drawable->h, 1432 attachments, i / 2, 1433 buffer_count, 1434 drawable->loaderPrivate); 1435} 1436 1437/** 1438 * \brief Assign a DRI buffer's DRM region to a renderbuffer. 1439 * 1440 * This is called from intel_update_renderbuffers(). 1441 * 1442 * \par Note: 1443 * DRI buffers whose attachment point is DRI2BufferStencil or 1444 * DRI2BufferDepthStencil are handled as special cases. 1445 * 1446 * \param buffer_name is a human readable name, such as "dri2 front buffer", 1447 * that is passed to drm_intel_bo_gem_create_from_name(). 1448 * 1449 * \see intel_update_renderbuffers() 1450 */ 1451static void 1452intel_process_dri2_buffer(struct brw_context *brw, 1453 __DRIdrawable *drawable, 1454 __DRIbuffer *buffer, 1455 struct intel_renderbuffer *rb, 1456 const char *buffer_name) 1457{ 1458 struct gl_framebuffer *fb = drawable->driverPrivate; 1459 drm_intel_bo *bo; 1460 1461 if (!rb) 1462 return; 1463 1464 unsigned num_samples = rb->Base.Base.NumSamples; 1465 1466 /* We try to avoid closing and reopening the same BO name, because the first 1467 * use of a mapping of the buffer involves a bunch of page faulting which is 1468 * moderately expensive. 1469 */ 1470 struct intel_mipmap_tree *last_mt; 1471 if (num_samples == 0) 1472 last_mt = rb->mt; 1473 else 1474 last_mt = rb->singlesample_mt; 1475 1476 uint32_t old_name = 0; 1477 if (last_mt) { 1478 /* The bo already has a name because the miptree was created by a 1479 * previous call to intel_process_dri2_buffer(). If a bo already has a 1480 * name, then drm_intel_bo_flink() is a low-cost getter. It does not 1481 * create a new name. 1482 */ 1483 drm_intel_bo_flink(last_mt->bo, &old_name); 1484 } 1485 1486 if (old_name == buffer->name) 1487 return; 1488 1489 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { 1490 fprintf(stderr, 1491 "attaching buffer %d, at %d, cpp %d, pitch %d\n", 1492 buffer->name, buffer->attachment, 1493 buffer->cpp, buffer->pitch); 1494 } 1495 1496 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name, 1497 buffer->name); 1498 if (!bo) { 1499 fprintf(stderr, 1500 "Failed to open BO for returned DRI2 buffer " 1501 "(%dx%d, %s, named %d).\n" 1502 "This is likely a bug in the X Server that will lead to a " 1503 "crash soon.\n", 1504 drawable->w, drawable->h, buffer_name, buffer->name); 1505 return; 1506 } 1507 1508 intel_update_winsys_renderbuffer_miptree(brw, rb, bo, 1509 drawable->w, drawable->h, 1510 buffer->pitch); 1511 1512 if (_mesa_is_front_buffer_drawing(fb) && 1513 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || 1514 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && 1515 rb->Base.Base.NumSamples > 1) { 1516 intel_renderbuffer_upsample(brw, rb); 1517 } 1518 1519 assert(rb->mt); 1520 1521 drm_intel_bo_unreference(bo); 1522} 1523 1524/** 1525 * \brief Query DRI image loader to obtain a DRIdrawable's buffers. 1526 * 1527 * To determine which DRI buffers to request, examine the renderbuffers 1528 * attached to the drawable's framebuffer. Then request the buffers from 1529 * the image loader 1530 * 1531 * This is called from intel_update_renderbuffers(). 1532 * 1533 * \param drawable Drawable whose buffers are queried. 1534 * \param buffers [out] List of buffers returned by DRI2 query. 1535 * \param buffer_count [out] Number of buffers returned. 1536 * 1537 * \see intel_update_renderbuffers() 1538 */ 1539 1540static void 1541intel_update_image_buffer(struct brw_context *intel, 1542 __DRIdrawable *drawable, 1543 struct intel_renderbuffer *rb, 1544 __DRIimage *buffer, 1545 enum __DRIimageBufferMask buffer_type) 1546{ 1547 struct gl_framebuffer *fb = drawable->driverPrivate; 1548 1549 if (!rb || !buffer->bo) 1550 return; 1551 1552 unsigned num_samples = rb->Base.Base.NumSamples; 1553 1554 /* Check and see if we're already bound to the right 1555 * buffer object 1556 */ 1557 struct intel_mipmap_tree *last_mt; 1558 if (num_samples == 0) 1559 last_mt = rb->mt; 1560 else 1561 last_mt = rb->singlesample_mt; 1562 1563 if (last_mt && last_mt->bo == buffer->bo) 1564 return; 1565 1566 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo, 1567 buffer->width, buffer->height, 1568 buffer->pitch); 1569 1570 if (_mesa_is_front_buffer_drawing(fb) && 1571 buffer_type == __DRI_IMAGE_BUFFER_FRONT && 1572 rb->Base.Base.NumSamples > 1) { 1573 intel_renderbuffer_upsample(intel, rb); 1574 } 1575} 1576 1577static void 1578intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1579{ 1580 struct gl_framebuffer *fb = drawable->driverPrivate; 1581 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1582 struct intel_renderbuffer *front_rb; 1583 struct intel_renderbuffer *back_rb; 1584 struct __DRIimageList images; 1585 unsigned int format; 1586 uint32_t buffer_mask = 0; 1587 1588 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1589 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1590 1591 if (back_rb) 1592 format = intel_rb_format(back_rb); 1593 else if (front_rb) 1594 format = intel_rb_format(front_rb); 1595 else 1596 return; 1597 1598 if (front_rb && (_mesa_is_front_buffer_drawing(fb) || 1599 _mesa_is_front_buffer_reading(fb) || !back_rb)) { 1600 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; 1601 } 1602 1603 if (back_rb) 1604 buffer_mask |= __DRI_IMAGE_BUFFER_BACK; 1605 1606 (*screen->image.loader->getBuffers) (drawable, 1607 driGLFormatToImageFormat(format), 1608 &drawable->dri2.stamp, 1609 drawable->loaderPrivate, 1610 buffer_mask, 1611 &images); 1612 1613 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { 1614 drawable->w = images.front->width; 1615 drawable->h = images.front->height; 1616 intel_update_image_buffer(brw, 1617 drawable, 1618 front_rb, 1619 images.front, 1620 __DRI_IMAGE_BUFFER_FRONT); 1621 } 1622 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) { 1623 drawable->w = images.back->width; 1624 drawable->h = images.back->height; 1625 intel_update_image_buffer(brw, 1626 drawable, 1627 back_rb, 1628 images.back, 1629 __DRI_IMAGE_BUFFER_BACK); 1630 } 1631} 1632