brw_context.c revision 9ec246796f95996868d61ffc9b52a2c1811bb66d
1/* 2 Copyright 2003 VMware, Inc. 3 Copyright (C) Intel Corp. 2006. All Rights Reserved. 4 Intel funded Tungsten Graphics to 5 develop this 3D driver. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 "Software"), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice (including the 16 next paragraph) shall be included in all copies or substantial 17 portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 27 **********************************************************************/ 28 /* 29 * Authors: 30 * Keith Whitwell <keithw@vmware.com> 31 */ 32 33 34#include "main/api_exec.h" 35#include "main/context.h" 36#include "main/fbobject.h" 37#include "main/extensions.h" 38#include "main/imports.h" 39#include "main/macros.h" 40#include "main/points.h" 41#include "main/version.h" 42#include "main/vtxfmt.h" 43#include "main/texobj.h" 44#include "main/framebuffer.h" 45 46#include "vbo/vbo_context.h" 47 48#include "drivers/common/driverfuncs.h" 49#include "drivers/common/meta.h" 50#include "utils.h" 51 52#include "brw_context.h" 53#include "brw_defines.h" 54#include "brw_compiler.h" 55#include "brw_draw.h" 56#include "brw_state.h" 57 58#include "intel_batchbuffer.h" 59#include "intel_buffer_objects.h" 60#include "intel_buffers.h" 61#include "intel_fbo.h" 62#include "intel_mipmap_tree.h" 63#include "intel_pixel.h" 64#include "intel_image.h" 65#include "intel_tex.h" 66#include "intel_tex_obj.h" 67 68#include "swrast_setup/swrast_setup.h" 69#include "tnl/tnl.h" 70#include "tnl/t_pipeline.h" 71#include "util/ralloc.h" 72#include "util/debug.h" 73 74/*************************************** 75 * Mesa's Driver Functions 76 ***************************************/ 77 78const char *const brw_vendor_string = "Intel Open Source Technology Center"; 79 80const char * 81brw_get_renderer_string(unsigned deviceID) 82{ 83 const char *chipset; 84 static char buffer[128]; 85 86 switch (deviceID) { 87#undef CHIPSET 88#define CHIPSET(id, symbol, str) case id: chipset = str; break; 89#include "pci_ids/i965_pci_ids.h" 90 default: 91 chipset = "Unknown Intel Chipset"; 92 break; 93 } 94 95 (void) driGetRendererString(buffer, chipset, 0); 96 return buffer; 97} 98 99static const GLubyte * 100intel_get_string(struct gl_context * ctx, GLenum name) 101{ 102 const struct brw_context *const brw = brw_context(ctx); 103 104 switch (name) { 105 case GL_VENDOR: 106 return (GLubyte *) brw_vendor_string; 107 108 case GL_RENDERER: 109 return 110 (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID); 111 112 default: 113 return NULL; 114 } 115} 116 117static void 118intel_viewport(struct gl_context *ctx) 119{ 120 struct brw_context *brw = brw_context(ctx); 121 __DRIcontext *driContext = brw->driContext; 122 123 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 124 if (driContext->driDrawablePriv) 125 dri2InvalidateDrawable(driContext->driDrawablePriv); 126 if (driContext->driReadablePriv) 127 dri2InvalidateDrawable(driContext->driReadablePriv); 128 } 129} 130 131static void 132intel_update_framebuffer(struct gl_context *ctx, 133 struct gl_framebuffer *fb) 134{ 135 struct brw_context *brw = brw_context(ctx); 136 137 /* Quantize the derived default number of samples 138 */ 139 fb->DefaultGeometry._NumSamples = 140 intel_quantize_num_samples(brw->intelScreen, 141 fb->DefaultGeometry.NumSamples); 142} 143 144static void 145intel_update_state(struct gl_context * ctx, GLuint new_state) 146{ 147 struct brw_context *brw = brw_context(ctx); 148 struct intel_texture_object *tex_obj; 149 struct intel_renderbuffer *depth_irb; 150 151 if (ctx->swrast_context) 152 _swrast_InvalidateState(ctx, new_state); 153 _vbo_InvalidateState(ctx, new_state); 154 155 brw->NewGLState |= new_state; 156 157 _mesa_unlock_context_textures(ctx); 158 159 /* Resolve the depth buffer's HiZ buffer. */ 160 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 161 if (depth_irb) 162 intel_renderbuffer_resolve_hiz(brw, depth_irb); 163 164 /* Resolve depth buffer and render cache of each enabled texture. */ 165 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; 166 for (int i = 0; i <= maxEnabledUnit; i++) { 167 if (!ctx->Texture.Unit[i]._Current) 168 continue; 169 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 170 if (!tex_obj || !tex_obj->mt) 171 continue; 172 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); 173 /* Sampling engine understands lossless compression and resolving 174 * those surfaces should be skipped for performance reasons. 175 */ 176 intel_miptree_resolve_color(brw, tex_obj->mt, 177 INTEL_MIPTREE_IGNORE_CCS_E); 178 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 179 } 180 181 /* Resolve color for each active shader image. */ 182 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 183 const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ? 184 ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL; 185 186 if (unlikely(shader && shader->NumImages)) { 187 for (unsigned j = 0; j < shader->NumImages; j++) { 188 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]]; 189 tex_obj = intel_texture_object(u->TexObj); 190 191 if (tex_obj && tex_obj->mt) { 192 /* Access to images is implemented using indirect messages 193 * against data port. Normal render target write understands 194 * lossless compression but unfortunately the typed/untyped 195 * read/write interface doesn't. Therefore the compressed 196 * surfaces need to be resolved prior to accessing them. 197 */ 198 intel_miptree_resolve_color(brw, tex_obj->mt, 0); 199 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 200 } 201 } 202 } 203 } 204 205 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the 206 * single-sampled color renderbuffers because the CCS buffer isn't 207 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is 208 * enabled because otherwise the surface state will be programmed with the 209 * linear equivalent format anyway. 210 */ 211 if (brw->gen >= 9 && ctx->Color.sRGBEnabled) { 212 struct gl_framebuffer *fb = ctx->DrawBuffer; 213 for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { 214 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; 215 216 if (rb == NULL) 217 continue; 218 219 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 220 struct intel_mipmap_tree *mt = irb->mt; 221 222 if (mt == NULL || 223 mt->num_samples > 1 || 224 _mesa_get_srgb_format_linear(mt->format) == mt->format) 225 continue; 226 227 /* Lossless compression is not supported for SRGB formats, it 228 * should be impossible to get here with such surfaces. 229 */ 230 assert(!intel_miptree_is_lossless_compressed(brw, mt)); 231 intel_miptree_resolve_color(brw, mt, 0); 232 brw_render_cache_set_check_flush(brw, mt->bo); 233 } 234 } 235 236 _mesa_lock_context_textures(ctx); 237 238 if (new_state & _NEW_BUFFERS) { 239 intel_update_framebuffer(ctx, ctx->DrawBuffer); 240 if (ctx->DrawBuffer != ctx->ReadBuffer) 241 intel_update_framebuffer(ctx, ctx->ReadBuffer); 242 } 243} 244 245#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) 246 247static void 248intel_flush_front(struct gl_context *ctx) 249{ 250 struct brw_context *brw = brw_context(ctx); 251 __DRIcontext *driContext = brw->driContext; 252 __DRIdrawable *driDrawable = driContext->driDrawablePriv; 253 __DRIscreen *const screen = brw->intelScreen->driScrnPriv; 254 255 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { 256 if (flushFront(screen) && driDrawable && 257 driDrawable->loaderPrivate) { 258 259 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. 260 * 261 * This potentially resolves both front and back buffer. It 262 * is unnecessary to resolve the back, but harms nothing except 263 * performance. And no one cares about front-buffer render 264 * performance. 265 */ 266 intel_resolve_for_dri2_flush(brw, driDrawable); 267 intel_batchbuffer_flush(brw); 268 269 flushFront(screen)(driDrawable, driDrawable->loaderPrivate); 270 271 /* We set the dirty bit in intel_prepare_render() if we're 272 * front buffer rendering once we get there. 273 */ 274 brw->front_buffer_dirty = false; 275 } 276 } 277} 278 279static void 280intel_glFlush(struct gl_context *ctx) 281{ 282 struct brw_context *brw = brw_context(ctx); 283 284 intel_batchbuffer_flush(brw); 285 intel_flush_front(ctx); 286 287 brw->need_flush_throttle = true; 288} 289 290static void 291intel_finish(struct gl_context * ctx) 292{ 293 struct brw_context *brw = brw_context(ctx); 294 295 intel_glFlush(ctx); 296 297 if (brw->batch.last_bo) 298 drm_intel_bo_wait_rendering(brw->batch.last_bo); 299} 300 301static void 302brw_init_driver_functions(struct brw_context *brw, 303 struct dd_function_table *functions) 304{ 305 _mesa_init_driver_functions(functions); 306 307 /* GLX uses DRI2 invalidate events to handle window resizing. 308 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib), 309 * which doesn't provide a mechanism for snooping the event queues. 310 * 311 * So EGL still relies on viewport hacks to handle window resizing. 312 * This should go away with DRI3000. 313 */ 314 if (!brw->driContext->driScreenPriv->dri2.useInvalidate) 315 functions->Viewport = intel_viewport; 316 317 functions->Flush = intel_glFlush; 318 functions->Finish = intel_finish; 319 functions->GetString = intel_get_string; 320 functions->UpdateState = intel_update_state; 321 322 intelInitTextureFuncs(functions); 323 intelInitTextureImageFuncs(functions); 324 intelInitTextureSubImageFuncs(functions); 325 intelInitTextureCopyImageFuncs(functions); 326 intelInitCopyImageFuncs(functions); 327 intelInitClearFuncs(functions); 328 intelInitBufferFuncs(functions); 329 intelInitPixelFuncs(functions); 330 intelInitBufferObjectFuncs(functions); 331 intel_init_syncobj_functions(functions); 332 brw_init_object_purgeable_functions(functions); 333 334 brwInitFragProgFuncs( functions ); 335 brw_init_common_queryobj_functions(functions); 336 if (brw->gen >= 6) 337 gen6_init_queryobj_functions(functions); 338 else 339 gen4_init_queryobj_functions(functions); 340 brw_init_compute_functions(functions); 341 if (brw->gen >= 7) 342 brw_init_conditional_render_functions(functions); 343 344 functions->QueryInternalFormat = brw_query_internal_format; 345 346 functions->NewTransformFeedback = brw_new_transform_feedback; 347 functions->DeleteTransformFeedback = brw_delete_transform_feedback; 348 functions->GetTransformFeedbackVertexCount = 349 brw_get_transform_feedback_vertex_count; 350 if (brw->gen >= 7) { 351 functions->BeginTransformFeedback = gen7_begin_transform_feedback; 352 functions->EndTransformFeedback = gen7_end_transform_feedback; 353 functions->PauseTransformFeedback = gen7_pause_transform_feedback; 354 functions->ResumeTransformFeedback = gen7_resume_transform_feedback; 355 } else { 356 functions->BeginTransformFeedback = brw_begin_transform_feedback; 357 functions->EndTransformFeedback = brw_end_transform_feedback; 358 } 359 360 if (brw->gen >= 6) 361 functions->GetSamplePosition = gen6_get_sample_position; 362} 363 364static void 365brw_initialize_context_constants(struct brw_context *brw) 366{ 367 struct gl_context *ctx = &brw->ctx; 368 const struct brw_compiler *compiler = brw->intelScreen->compiler; 369 370 const bool stage_exists[MESA_SHADER_STAGES] = { 371 [MESA_SHADER_VERTEX] = true, 372 [MESA_SHADER_TESS_CTRL] = brw->gen >= 7, 373 [MESA_SHADER_TESS_EVAL] = brw->gen >= 7, 374 [MESA_SHADER_GEOMETRY] = brw->gen >= 6, 375 [MESA_SHADER_FRAGMENT] = true, 376 [MESA_SHADER_COMPUTE] = 377 (ctx->API == API_OPENGL_CORE && 378 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || 379 (ctx->API == API_OPENGLES2 && 380 ctx->Const.MaxComputeWorkGroupSize[0] >= 128) || 381 _mesa_extension_override_enables.ARB_compute_shader, 382 }; 383 384 unsigned num_stages = 0; 385 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 386 if (stage_exists[i]) 387 num_stages++; 388 } 389 390 unsigned max_samplers = 391 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; 392 393 ctx->Const.MaxDualSourceDrawBuffers = 1; 394 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; 395 ctx->Const.MaxCombinedShaderOutputResources = 396 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; 397 398 ctx->Const.QueryCounterBits.Timestamp = 36; 399 400 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ 401 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; 402 ctx->Const.MaxRenderbufferSize = 8192; 403 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); 404 ctx->Const.Max3DTextureLevels = 12; /* 2048 */ 405 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ 406 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; 407 ctx->Const.MaxTextureMbytes = 1536; 408 ctx->Const.MaxTextureRectSize = 1 << 12; 409 ctx->Const.MaxTextureMaxAnisotropy = 16.0; 410 ctx->Const.StripTextureBorder = true; 411 if (brw->gen >= 7) 412 ctx->Const.MaxProgramTextureGatherComponents = 4; 413 else if (brw->gen == 6) 414 ctx->Const.MaxProgramTextureGatherComponents = 1; 415 416 ctx->Const.MaxUniformBlockSize = 65536; 417 418 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 419 struct gl_program_constants *prog = &ctx->Const.Program[i]; 420 421 if (!stage_exists[i]) 422 continue; 423 424 prog->MaxTextureImageUnits = max_samplers; 425 426 prog->MaxUniformBlocks = BRW_MAX_UBO; 427 prog->MaxCombinedUniformComponents = 428 prog->MaxUniformComponents + 429 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; 430 431 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; 432 prog->MaxAtomicBuffers = BRW_MAX_ABO; 433 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; 434 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; 435 } 436 437 ctx->Const.MaxTextureUnits = 438 MIN2(ctx->Const.MaxTextureCoordUnits, 439 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); 440 441 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; 442 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; 443 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; 444 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; 445 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; 446 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; 447 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; 448 449 450 /* Hardware only supports a limited number of transform feedback buffers. 451 * So we need to override the Mesa default (which is based only on software 452 * limits). 453 */ 454 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; 455 456 /* On Gen6, in the worst case, we use up one binding table entry per 457 * transform feedback component (see comments above the definition of 458 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value 459 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to 460 * BRW_MAX_SOL_BINDINGS. 461 * 462 * In "separate components" mode, we need to divide this value by 463 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries 464 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. 465 */ 466 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; 467 ctx->Const.MaxTransformFeedbackSeparateComponents = 468 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; 469 470 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true; 471 472 int max_samples; 473 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); 474 const int clamp_max_samples = 475 driQueryOptioni(&brw->optionCache, "clamp_max_samples"); 476 477 if (clamp_max_samples < 0) { 478 max_samples = msaa_modes[0]; 479 } else { 480 /* Select the largest supported MSAA mode that does not exceed 481 * clamp_max_samples. 482 */ 483 max_samples = 0; 484 for (int i = 0; msaa_modes[i] != 0; ++i) { 485 if (msaa_modes[i] <= clamp_max_samples) { 486 max_samples = msaa_modes[i]; 487 break; 488 } 489 } 490 } 491 492 ctx->Const.MaxSamples = max_samples; 493 ctx->Const.MaxColorTextureSamples = max_samples; 494 ctx->Const.MaxDepthTextureSamples = max_samples; 495 ctx->Const.MaxIntegerSamples = max_samples; 496 ctx->Const.MaxImageSamples = 0; 497 498 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used 499 * to map indices of rectangular grid to sample numbers within a pixel. 500 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled 501 * extension implementation. For more details see the comment above 502 * gen6_set_sample_maps() definition. 503 */ 504 gen6_set_sample_maps(ctx); 505 506 ctx->Const.MinLineWidth = 1.0; 507 ctx->Const.MinLineWidthAA = 1.0; 508 if (brw->gen >= 6) { 509 ctx->Const.MaxLineWidth = 7.375; 510 ctx->Const.MaxLineWidthAA = 7.375; 511 ctx->Const.LineWidthGranularity = 0.125; 512 } else { 513 ctx->Const.MaxLineWidth = 7.0; 514 ctx->Const.MaxLineWidthAA = 7.0; 515 ctx->Const.LineWidthGranularity = 0.5; 516 } 517 518 /* For non-antialiased lines, we have to round the line width to the 519 * nearest whole number. Make sure that we don't advertise a line 520 * width that, when rounded, will be beyond the actual hardware 521 * maximum. 522 */ 523 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); 524 525 ctx->Const.MinPointSize = 1.0; 526 ctx->Const.MinPointSizeAA = 1.0; 527 ctx->Const.MaxPointSize = 255.0; 528 ctx->Const.MaxPointSizeAA = 255.0; 529 ctx->Const.PointSizeGranularity = 1.0; 530 531 if (brw->gen >= 5 || brw->is_g4x) 532 ctx->Const.MaxClipPlanes = 8; 533 534 ctx->Const.LowerTessLevel = true; 535 536 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; 537 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; 538 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; 539 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0; 540 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0; 541 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0; 542 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0; 543 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16; 544 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256; 545 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1; 546 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024; 547 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams = 548 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters, 549 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams); 550 551 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024; 552 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024; 553 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024; 554 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024; 555 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12; 556 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256; 557 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0; 558 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024; 559 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams = 560 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters, 561 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams); 562 563 /* Fragment shaders use real, 32-bit twos-complement integers for all 564 * integer types. 565 */ 566 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31; 567 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30; 568 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0; 569 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 570 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 571 572 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; 573 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; 574 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; 575 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 576 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 577 578 /* Gen6 converts quads to polygon in beginning of 3D pipeline, 579 * but we're not sure how it's actually done for vertex order, 580 * that affect provoking vertex decision. Always use last vertex 581 * convention for quad primitive which works as expected for now. 582 */ 583 if (brw->gen >= 6) 584 ctx->Const.QuadsFollowProvokingVertexConvention = false; 585 586 ctx->Const.NativeIntegers = true; 587 ctx->Const.VertexID_is_zero_based = true; 588 589 /* Regarding the CMP instruction, the Ivybridge PRM says: 590 * 591 * "For each enabled channel 0b or 1b is assigned to the appropriate flag 592 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord 593 * 0xFFFFFFFF) is assigned to dst." 594 * 595 * but PRMs for earlier generations say 596 * 597 * "In dword format, one GRF may store up to 8 results. When the register 598 * is used later as a vector of Booleans, as only LSB at each channel 599 * contains meaning [sic] data, software should make sure all higher bits 600 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." 601 * 602 * We select the representation of a true boolean uniform to be ~0, and fix 603 * the results of Gen <= 5 CMP instruction's with -(result & 1). 604 */ 605 ctx->Const.UniformBooleanTrue = ~0; 606 607 /* From the gen4 PRM, volume 4 page 127: 608 * 609 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies 610 * the base address of the first element of the surface, computed in 611 * software by adding the surface base address to the byte offset of 612 * the element in the buffer." 613 * 614 * However, unaligned accesses are slower, so enforce buffer alignment. 615 */ 616 ctx->Const.UniformBufferOffsetAlignment = 16; 617 618 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so 619 * that we can safely have the CPU and GPU writing the same SSBO on 620 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never 621 * writes, so there's no problem. For an SSBO, the GPU and the CPU can 622 * be updating disjoint regions of the buffer simultaneously and that will 623 * break if the regions overlap the same cacheline. 624 */ 625 ctx->Const.ShaderStorageBufferOffsetAlignment = 64; 626 ctx->Const.TextureBufferOffsetAlignment = 16; 627 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; 628 629 if (brw->gen >= 6) { 630 ctx->Const.MaxVarying = 32; 631 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; 632 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64; 633 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; 634 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; 635 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; 636 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128; 637 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128; 638 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128; 639 } 640 641 /* We want the GLSL compiler to emit code that uses condition codes */ 642 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 643 ctx->Const.ShaderCompilerOptions[i] = 644 brw->intelScreen->compiler->glsl_compiler_options[i]; 645 } 646 647 if (brw->gen >= 7) { 648 ctx->Const.MaxViewportWidth = 32768; 649 ctx->Const.MaxViewportHeight = 32768; 650 } 651 652 /* ARB_viewport_array */ 653 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { 654 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; 655 ctx->Const.ViewportSubpixelBits = 0; 656 657 /* Cast to float before negating because MaxViewportWidth is unsigned. 658 */ 659 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; 660 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; 661 } 662 663 /* ARB_gpu_shader5 */ 664 if (brw->gen >= 7) 665 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); 666 667 /* ARB_framebuffer_no_attachments */ 668 ctx->Const.MaxFramebufferWidth = 16384; 669 ctx->Const.MaxFramebufferHeight = 16384; 670 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; 671 ctx->Const.MaxFramebufferSamples = max_samples; 672} 673 674static void 675brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads) 676{ 677 struct gl_context *ctx = &brw->ctx; 678 679 /* For ES, we set these constants based on SIMD8. 680 * 681 * TODO: Once we can always generate SIMD16, we should update this. 682 * 683 * For GL, we assume we can generate a SIMD16 program, but this currently 684 * is not always true. This allows us to run more test cases, and will be 685 * required based on desktop GL compute shader requirements. 686 */ 687 const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; 688 689 const uint32_t max_invocations = simd_size * max_threads; 690 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; 691 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; 692 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; 693 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; 694 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; 695} 696 697/** 698 * Process driconf (drirc) options, setting appropriate context flags. 699 * 700 * intelInitExtensions still pokes at optionCache directly, in order to 701 * avoid advertising various extensions. No flags are set, so it makes 702 * sense to continue doing that there. 703 */ 704static void 705brw_process_driconf_options(struct brw_context *brw) 706{ 707 struct gl_context *ctx = &brw->ctx; 708 709 driOptionCache *options = &brw->optionCache; 710 driParseConfigFiles(options, &brw->intelScreen->optionCache, 711 brw->driContext->driScreenPriv->myNum, "i965"); 712 713 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); 714 switch (bo_reuse_mode) { 715 case DRI_CONF_BO_REUSE_DISABLED: 716 break; 717 case DRI_CONF_BO_REUSE_ALL: 718 intel_bufmgr_gem_enable_reuse(brw->bufmgr); 719 break; 720 } 721 722 if (!driQueryOptionb(options, "hiz")) { 723 brw->has_hiz = false; 724 /* On gen6, you can only do separate stencil with HIZ. */ 725 if (brw->gen == 6) 726 brw->has_separate_stencil = false; 727 } 728 729 if (driQueryOptionb(options, "always_flush_batch")) { 730 fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); 731 brw->always_flush_batch = true; 732 } 733 734 if (driQueryOptionb(options, "always_flush_cache")) { 735 fprintf(stderr, "flushing GPU caches before/after each draw call\n"); 736 brw->always_flush_cache = true; 737 } 738 739 if (driQueryOptionb(options, "disable_throttling")) { 740 fprintf(stderr, "disabling flush throttling\n"); 741 brw->disable_throttling = true; 742 } 743 744 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); 745 746 ctx->Const.ForceGLSLExtensionsWarn = 747 driQueryOptionb(options, "force_glsl_extensions_warn"); 748 749 ctx->Const.DisableGLSLLineContinuations = 750 driQueryOptionb(options, "disable_glsl_line_continuations"); 751 752 ctx->Const.AllowGLSLExtensionDirectiveMidShader = 753 driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); 754 755 brw->dual_color_blend_by_location = 756 driQueryOptionb(options, "dual_color_blend_by_location"); 757} 758 759GLboolean 760brwCreateContext(gl_api api, 761 const struct gl_config *mesaVis, 762 __DRIcontext *driContextPriv, 763 unsigned major_version, 764 unsigned minor_version, 765 uint32_t flags, 766 bool notify_reset, 767 unsigned *dri_ctx_error, 768 void *sharedContextPrivate) 769{ 770 __DRIscreen *sPriv = driContextPriv->driScreenPriv; 771 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; 772 struct intel_screen *screen = sPriv->driverPrivate; 773 const struct brw_device_info *devinfo = screen->devinfo; 774 struct dd_function_table functions; 775 776 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel 777 * provides us with context reset notifications. 778 */ 779 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG 780 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE; 781 782 if (screen->has_context_reset_notification) 783 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; 784 785 if (flags & ~allowed_flags) { 786 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; 787 return false; 788 } 789 790 struct brw_context *brw = rzalloc(NULL, struct brw_context); 791 if (!brw) { 792 fprintf(stderr, "%s: failed to alloc context\n", __func__); 793 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 794 return false; 795 } 796 797 driContextPriv->driverPrivate = brw; 798 brw->driContext = driContextPriv; 799 brw->intelScreen = screen; 800 brw->bufmgr = screen->bufmgr; 801 802 brw->gen = devinfo->gen; 803 brw->gt = devinfo->gt; 804 brw->is_g4x = devinfo->is_g4x; 805 brw->is_baytrail = devinfo->is_baytrail; 806 brw->is_haswell = devinfo->is_haswell; 807 brw->is_cherryview = devinfo->is_cherryview; 808 brw->is_broxton = devinfo->is_broxton; 809 brw->has_llc = devinfo->has_llc; 810 brw->has_hiz = devinfo->has_hiz_and_separate_stencil; 811 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; 812 brw->has_pln = devinfo->has_pln; 813 brw->has_compr4 = devinfo->has_compr4; 814 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; 815 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; 816 brw->needs_unlit_centroid_workaround = 817 devinfo->needs_unlit_centroid_workaround; 818 819 brw->must_use_separate_stencil = devinfo->must_use_separate_stencil; 820 brw->has_swizzling = screen->hw_has_swizzling; 821 822 brw->vs.base.stage = MESA_SHADER_VERTEX; 823 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; 824 brw->tes.base.stage = MESA_SHADER_TESS_EVAL; 825 brw->gs.base.stage = MESA_SHADER_GEOMETRY; 826 brw->wm.base.stage = MESA_SHADER_FRAGMENT; 827 if (brw->gen >= 8) { 828 gen8_init_vtable_surface_functions(brw); 829 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; 830 } else if (brw->gen >= 7) { 831 gen7_init_vtable_surface_functions(brw); 832 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; 833 } else if (brw->gen >= 6) { 834 gen6_init_vtable_surface_functions(brw); 835 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; 836 } else { 837 gen4_init_vtable_surface_functions(brw); 838 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; 839 } 840 841 brw_init_driver_functions(brw, &functions); 842 843 if (notify_reset) 844 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; 845 846 struct gl_context *ctx = &brw->ctx; 847 848 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { 849 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 850 fprintf(stderr, "%s: failed to init mesa context\n", __func__); 851 intelDestroyContext(driContextPriv); 852 return false; 853 } 854 855 driContextSetFlags(ctx, flags); 856 857 /* Initialize the software rasterizer and helper modules. 858 * 859 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for 860 * software fallbacks (which we have to support on legacy GL to do weird 861 * glDrawPixels(), glBitmap(), and other functions). 862 */ 863 if (api != API_OPENGL_CORE && api != API_OPENGLES2) { 864 _swrast_CreateContext(ctx); 865 } 866 867 _vbo_CreateContext(ctx); 868 if (ctx->swrast_context) { 869 _tnl_CreateContext(ctx); 870 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; 871 _swsetup_CreateContext(ctx); 872 873 /* Configure swrast to match hardware characteristics: */ 874 _swrast_allow_pixel_fog(ctx, false); 875 _swrast_allow_vertex_fog(ctx, true); 876 } 877 878 _mesa_meta_init(ctx); 879 880 brw_process_driconf_options(brw); 881 882 if (INTEL_DEBUG & DEBUG_PERF) 883 brw->perf_debug = true; 884 885 brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads); 886 brw_initialize_context_constants(brw); 887 888 ctx->Const.ResetStrategy = notify_reset 889 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; 890 891 /* Reinitialize the context point state. It depends on ctx->Const values. */ 892 _mesa_init_point(ctx); 893 894 intel_fbo_init(brw); 895 896 intel_batchbuffer_init(brw); 897 898 if (brw->gen >= 6) { 899 /* Create a new hardware context. Using a hardware context means that 900 * our GPU state will be saved/restored on context switch, allowing us 901 * to assume that the GPU is in the same state we left it in. 902 * 903 * This is required for transform feedback buffer offsets, query objects, 904 * and also allows us to reduce how much state we have to emit. 905 */ 906 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); 907 908 if (!brw->hw_ctx) { 909 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); 910 intelDestroyContext(driContextPriv); 911 return false; 912 } 913 } 914 915 if (brw_init_pipe_control(brw, devinfo)) { 916 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 917 intelDestroyContext(driContextPriv); 918 return false; 919 } 920 921 brw_init_state(brw); 922 923 intelInitExtensions(ctx); 924 925 brw_init_surface_formats(brw); 926 927 brw->max_vs_threads = devinfo->max_vs_threads; 928 brw->max_hs_threads = devinfo->max_hs_threads; 929 brw->max_ds_threads = devinfo->max_ds_threads; 930 brw->max_gs_threads = devinfo->max_gs_threads; 931 brw->max_wm_threads = devinfo->max_wm_threads; 932 brw->max_cs_threads = devinfo->max_cs_threads; 933 brw->urb.size = devinfo->urb.size; 934 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; 935 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; 936 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; 937 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; 938 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; 939 940 /* Estimate the size of the mappable aperture into the GTT. There's an 941 * ioctl to get the whole GTT size, but not one to get the mappable subset. 942 * It turns out it's basically always 256MB, though some ancient hardware 943 * was smaller. 944 */ 945 uint32_t gtt_size = 256 * 1024 * 1024; 946 947 /* We don't want to map two objects such that a memcpy between them would 948 * just fault one mapping in and then the other over and over forever. So 949 * we would need to divide the GTT size by 2. Additionally, some GTT is 950 * taken up by things like the framebuffer and the ringbuffer and such, so 951 * be more conservative. 952 */ 953 brw->max_gtt_map_object_size = gtt_size / 4; 954 955 if (brw->gen == 6) 956 brw->urb.gs_present = false; 957 958 brw->prim_restart.in_progress = false; 959 brw->prim_restart.enable_cut_index = false; 960 brw->gs.enabled = false; 961 brw->sf.viewport_transform_enable = true; 962 963 brw->predicate.state = BRW_PREDICATE_STATE_RENDER; 964 965 brw->use_resource_streamer = screen->has_resource_streamer && 966 (env_var_as_boolean("INTEL_USE_HW_BT", false) || 967 env_var_as_boolean("INTEL_USE_GATHER", false)); 968 969 ctx->VertexProgram._MaintainTnlProgram = true; 970 ctx->FragmentProgram._MaintainTexEnvProgram = true; 971 972 brw_draw_init( brw ); 973 974 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { 975 /* Turn on some extra GL_ARB_debug_output generation. */ 976 brw->perf_debug = true; 977 } 978 979 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) 980 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; 981 982 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 983 brw_init_shader_time(brw); 984 985 _mesa_compute_version(ctx); 986 987 _mesa_initialize_dispatch_tables(ctx); 988 _mesa_initialize_vbo_vtxfmt(ctx); 989 990 if (ctx->Extensions.AMD_performance_monitor) { 991 brw_init_performance_monitors(brw); 992 } 993 994 vbo_use_buffer_objects(ctx); 995 vbo_always_unmap_buffers(ctx); 996 997 return true; 998} 999 1000void 1001intelDestroyContext(__DRIcontext * driContextPriv) 1002{ 1003 struct brw_context *brw = 1004 (struct brw_context *) driContextPriv->driverPrivate; 1005 struct gl_context *ctx = &brw->ctx; 1006 1007 /* Dump a final BMP in case the application doesn't call SwapBuffers */ 1008 if (INTEL_DEBUG & DEBUG_AUB) { 1009 intel_batchbuffer_flush(brw); 1010 aub_dump_bmp(&brw->ctx); 1011 } 1012 1013 _mesa_meta_free(&brw->ctx); 1014 brw_meta_fast_clear_free(brw); 1015 1016 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 1017 /* Force a report. */ 1018 brw->shader_time.report_time = 0; 1019 1020 brw_collect_and_report_shader_time(brw); 1021 brw_destroy_shader_time(brw); 1022 } 1023 1024 brw_destroy_state(brw); 1025 brw_draw_destroy(brw); 1026 1027 drm_intel_bo_unreference(brw->curbe.curbe_bo); 1028 if (brw->vs.base.scratch_bo) 1029 drm_intel_bo_unreference(brw->vs.base.scratch_bo); 1030 if (brw->gs.base.scratch_bo) 1031 drm_intel_bo_unreference(brw->gs.base.scratch_bo); 1032 if (brw->wm.base.scratch_bo) 1033 drm_intel_bo_unreference(brw->wm.base.scratch_bo); 1034 1035 gen7_reset_hw_bt_pool_offsets(brw); 1036 drm_intel_bo_unreference(brw->hw_bt_pool.bo); 1037 brw->hw_bt_pool.bo = NULL; 1038 1039 drm_intel_gem_context_destroy(brw->hw_ctx); 1040 1041 if (ctx->swrast_context) { 1042 _swsetup_DestroyContext(&brw->ctx); 1043 _tnl_DestroyContext(&brw->ctx); 1044 } 1045 _vbo_DestroyContext(&brw->ctx); 1046 1047 if (ctx->swrast_context) 1048 _swrast_DestroyContext(&brw->ctx); 1049 1050 brw_fini_pipe_control(brw); 1051 intel_batchbuffer_free(brw); 1052 1053 drm_intel_bo_unreference(brw->throttle_batch[1]); 1054 drm_intel_bo_unreference(brw->throttle_batch[0]); 1055 brw->throttle_batch[1] = NULL; 1056 brw->throttle_batch[0] = NULL; 1057 1058 driDestroyOptionCache(&brw->optionCache); 1059 1060 /* free the Mesa context */ 1061 _mesa_free_context_data(&brw->ctx); 1062 1063 ralloc_free(brw); 1064 driContextPriv->driverPrivate = NULL; 1065} 1066 1067GLboolean 1068intelUnbindContext(__DRIcontext * driContextPriv) 1069{ 1070 /* Unset current context and dispath table */ 1071 _mesa_make_current(NULL, NULL, NULL); 1072 1073 return true; 1074} 1075 1076/** 1077 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior 1078 * on window system framebuffers. 1079 * 1080 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if 1081 * your renderbuffer can do sRGB encode, and you can flip a switch that does 1082 * sRGB encode if the renderbuffer can handle it. You can ask specifically 1083 * for a visual where you're guaranteed to be capable, but it turns out that 1084 * everyone just makes all their ARGB8888 visuals capable and doesn't offer 1085 * incapable ones, because there's no difference between the two in resources 1086 * used. Applications thus get built that accidentally rely on the default 1087 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds 1088 * great... 1089 * 1090 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode 1091 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. 1092 * So they removed the enable knob and made it "if the renderbuffer is sRGB 1093 * capable, do sRGB encode". Then, for your window system renderbuffers, you 1094 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals 1095 * and get no sRGB encode (assuming that both kinds of visual are available). 1096 * Thus our choice to support sRGB by default on our visuals for desktop would 1097 * result in broken rendering of GLES apps that aren't expecting sRGB encode. 1098 * 1099 * Unfortunately, renderbuffer setup happens before a context is created. So 1100 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 1101 * context (without an sRGB visual, though we don't have sRGB visuals exposed 1102 * yet), we go turn that back off before anyone finds out. 1103 */ 1104static void 1105intel_gles3_srgb_workaround(struct brw_context *brw, 1106 struct gl_framebuffer *fb) 1107{ 1108 struct gl_context *ctx = &brw->ctx; 1109 1110 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) 1111 return; 1112 1113 /* Some day when we support the sRGB capable bit on visuals available for 1114 * GLES, we'll need to respect that and not disable things here. 1115 */ 1116 fb->Visual.sRGBCapable = false; 1117 for (int i = 0; i < BUFFER_COUNT; i++) { 1118 if (fb->Attachment[i].Renderbuffer && 1119 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) { 1120 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM; 1121 } 1122 } 1123} 1124 1125GLboolean 1126intelMakeCurrent(__DRIcontext * driContextPriv, 1127 __DRIdrawable * driDrawPriv, 1128 __DRIdrawable * driReadPriv) 1129{ 1130 struct brw_context *brw; 1131 GET_CURRENT_CONTEXT(curCtx); 1132 1133 if (driContextPriv) 1134 brw = (struct brw_context *) driContextPriv->driverPrivate; 1135 else 1136 brw = NULL; 1137 1138 /* According to the glXMakeCurrent() man page: "Pending commands to 1139 * the previous context, if any, are flushed before it is released." 1140 * But only flush if we're actually changing contexts. 1141 */ 1142 if (brw_context(curCtx) && brw_context(curCtx) != brw) { 1143 _mesa_flush(curCtx); 1144 } 1145 1146 if (driContextPriv) { 1147 struct gl_context *ctx = &brw->ctx; 1148 struct gl_framebuffer *fb, *readFb; 1149 1150 if (driDrawPriv == NULL) { 1151 fb = _mesa_get_incomplete_framebuffer(); 1152 } else { 1153 fb = driDrawPriv->driverPrivate; 1154 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; 1155 } 1156 1157 if (driReadPriv == NULL) { 1158 readFb = _mesa_get_incomplete_framebuffer(); 1159 } else { 1160 readFb = driReadPriv->driverPrivate; 1161 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; 1162 } 1163 1164 /* The sRGB workaround changes the renderbuffer's format. We must change 1165 * the format before the renderbuffer's miptree get's allocated, otherwise 1166 * the formats of the renderbuffer and its miptree will differ. 1167 */ 1168 intel_gles3_srgb_workaround(brw, fb); 1169 intel_gles3_srgb_workaround(brw, readFb); 1170 1171 /* If the context viewport hasn't been initialized, force a call out to 1172 * the loader to get buffers so we have a drawable size for the initial 1173 * viewport. */ 1174 if (!brw->ctx.ViewportInitialized) 1175 intel_prepare_render(brw); 1176 1177 _mesa_make_current(ctx, fb, readFb); 1178 } else { 1179 _mesa_make_current(NULL, NULL, NULL); 1180 } 1181 1182 return true; 1183} 1184 1185void 1186intel_resolve_for_dri2_flush(struct brw_context *brw, 1187 __DRIdrawable *drawable) 1188{ 1189 if (brw->gen < 6) { 1190 /* MSAA and fast color clear are not supported, so don't waste time 1191 * checking whether a resolve is needed. 1192 */ 1193 return; 1194 } 1195 1196 struct gl_framebuffer *fb = drawable->driverPrivate; 1197 struct intel_renderbuffer *rb; 1198 1199 /* Usually, only the back buffer will need to be downsampled. However, 1200 * the front buffer will also need it if the user has rendered into it. 1201 */ 1202 static const gl_buffer_index buffers[2] = { 1203 BUFFER_BACK_LEFT, 1204 BUFFER_FRONT_LEFT, 1205 }; 1206 1207 for (int i = 0; i < 2; ++i) { 1208 rb = intel_get_renderbuffer(fb, buffers[i]); 1209 if (rb == NULL || rb->mt == NULL) 1210 continue; 1211 if (rb->mt->num_samples <= 1) 1212 intel_miptree_resolve_color(brw, rb->mt, 0); 1213 else 1214 intel_renderbuffer_downsample(brw, rb); 1215 } 1216} 1217 1218static unsigned 1219intel_bits_per_pixel(const struct intel_renderbuffer *rb) 1220{ 1221 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; 1222} 1223 1224static void 1225intel_query_dri2_buffers(struct brw_context *brw, 1226 __DRIdrawable *drawable, 1227 __DRIbuffer **buffers, 1228 int *count); 1229 1230static void 1231intel_process_dri2_buffer(struct brw_context *brw, 1232 __DRIdrawable *drawable, 1233 __DRIbuffer *buffer, 1234 struct intel_renderbuffer *rb, 1235 const char *buffer_name); 1236 1237static void 1238intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable); 1239 1240static void 1241intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1242{ 1243 struct gl_framebuffer *fb = drawable->driverPrivate; 1244 struct intel_renderbuffer *rb; 1245 __DRIbuffer *buffers = NULL; 1246 int i, count; 1247 const char *region_name; 1248 1249 /* Set this up front, so that in case our buffers get invalidated 1250 * while we're getting new buffers, we don't clobber the stamp and 1251 * thus ignore the invalidate. */ 1252 drawable->lastStamp = drawable->dri2.stamp; 1253 1254 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1255 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1256 1257 intel_query_dri2_buffers(brw, drawable, &buffers, &count); 1258 1259 if (buffers == NULL) 1260 return; 1261 1262 for (i = 0; i < count; i++) { 1263 switch (buffers[i].attachment) { 1264 case __DRI_BUFFER_FRONT_LEFT: 1265 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1266 region_name = "dri2 front buffer"; 1267 break; 1268 1269 case __DRI_BUFFER_FAKE_FRONT_LEFT: 1270 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1271 region_name = "dri2 fake front buffer"; 1272 break; 1273 1274 case __DRI_BUFFER_BACK_LEFT: 1275 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1276 region_name = "dri2 back buffer"; 1277 break; 1278 1279 case __DRI_BUFFER_DEPTH: 1280 case __DRI_BUFFER_HIZ: 1281 case __DRI_BUFFER_DEPTH_STENCIL: 1282 case __DRI_BUFFER_STENCIL: 1283 case __DRI_BUFFER_ACCUM: 1284 default: 1285 fprintf(stderr, 1286 "unhandled buffer attach event, attachment type %d\n", 1287 buffers[i].attachment); 1288 return; 1289 } 1290 1291 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name); 1292 } 1293 1294} 1295 1296void 1297intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) 1298{ 1299 struct brw_context *brw = context->driverPrivate; 1300 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1301 1302 /* Set this up front, so that in case our buffers get invalidated 1303 * while we're getting new buffers, we don't clobber the stamp and 1304 * thus ignore the invalidate. */ 1305 drawable->lastStamp = drawable->dri2.stamp; 1306 1307 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1308 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1309 1310 if (screen->image.loader) 1311 intel_update_image_buffers(brw, drawable); 1312 else 1313 intel_update_dri2_buffers(brw, drawable); 1314 1315 driUpdateFramebufferSize(&brw->ctx, drawable); 1316} 1317 1318/** 1319 * intel_prepare_render should be called anywhere that curent read/drawbuffer 1320 * state is required. 1321 */ 1322void 1323intel_prepare_render(struct brw_context *brw) 1324{ 1325 struct gl_context *ctx = &brw->ctx; 1326 __DRIcontext *driContext = brw->driContext; 1327 __DRIdrawable *drawable; 1328 1329 drawable = driContext->driDrawablePriv; 1330 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { 1331 if (drawable->lastStamp != drawable->dri2.stamp) 1332 intel_update_renderbuffers(driContext, drawable); 1333 driContext->dri2.draw_stamp = drawable->dri2.stamp; 1334 } 1335 1336 drawable = driContext->driReadablePriv; 1337 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { 1338 if (drawable->lastStamp != drawable->dri2.stamp) 1339 intel_update_renderbuffers(driContext, drawable); 1340 driContext->dri2.read_stamp = drawable->dri2.stamp; 1341 } 1342 1343 /* If we're currently rendering to the front buffer, the rendering 1344 * that will happen next will probably dirty the front buffer. So 1345 * mark it as dirty here. 1346 */ 1347 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) 1348 brw->front_buffer_dirty = true; 1349} 1350 1351/** 1352 * \brief Query DRI2 to obtain a DRIdrawable's buffers. 1353 * 1354 * To determine which DRI buffers to request, examine the renderbuffers 1355 * attached to the drawable's framebuffer. Then request the buffers with 1356 * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). 1357 * 1358 * This is called from intel_update_renderbuffers(). 1359 * 1360 * \param drawable Drawable whose buffers are queried. 1361 * \param buffers [out] List of buffers returned by DRI2 query. 1362 * \param buffer_count [out] Number of buffers returned. 1363 * 1364 * \see intel_update_renderbuffers() 1365 * \see DRI2GetBuffers() 1366 * \see DRI2GetBuffersWithFormat() 1367 */ 1368static void 1369intel_query_dri2_buffers(struct brw_context *brw, 1370 __DRIdrawable *drawable, 1371 __DRIbuffer **buffers, 1372 int *buffer_count) 1373{ 1374 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1375 struct gl_framebuffer *fb = drawable->driverPrivate; 1376 int i = 0; 1377 unsigned attachments[8]; 1378 1379 struct intel_renderbuffer *front_rb; 1380 struct intel_renderbuffer *back_rb; 1381 1382 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1383 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1384 1385 memset(attachments, 0, sizeof(attachments)); 1386 if ((_mesa_is_front_buffer_drawing(fb) || 1387 _mesa_is_front_buffer_reading(fb) || 1388 !back_rb) && front_rb) { 1389 /* If a fake front buffer is in use, then querying for 1390 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from 1391 * the real front buffer to the fake front buffer. So before doing the 1392 * query, we need to make sure all the pending drawing has landed in the 1393 * real front buffer. 1394 */ 1395 intel_batchbuffer_flush(brw); 1396 intel_flush_front(&brw->ctx); 1397 1398 attachments[i++] = __DRI_BUFFER_FRONT_LEFT; 1399 attachments[i++] = intel_bits_per_pixel(front_rb); 1400 } else if (front_rb && brw->front_buffer_dirty) { 1401 /* We have pending front buffer rendering, but we aren't querying for a 1402 * front buffer. If the front buffer we have is a fake front buffer, 1403 * the X server is going to throw it away when it processes the query. 1404 * So before doing the query, make sure all the pending drawing has 1405 * landed in the real front buffer. 1406 */ 1407 intel_batchbuffer_flush(brw); 1408 intel_flush_front(&brw->ctx); 1409 } 1410 1411 if (back_rb) { 1412 attachments[i++] = __DRI_BUFFER_BACK_LEFT; 1413 attachments[i++] = intel_bits_per_pixel(back_rb); 1414 } 1415 1416 assert(i <= ARRAY_SIZE(attachments)); 1417 1418 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable, 1419 &drawable->w, 1420 &drawable->h, 1421 attachments, i / 2, 1422 buffer_count, 1423 drawable->loaderPrivate); 1424} 1425 1426/** 1427 * \brief Assign a DRI buffer's DRM region to a renderbuffer. 1428 * 1429 * This is called from intel_update_renderbuffers(). 1430 * 1431 * \par Note: 1432 * DRI buffers whose attachment point is DRI2BufferStencil or 1433 * DRI2BufferDepthStencil are handled as special cases. 1434 * 1435 * \param buffer_name is a human readable name, such as "dri2 front buffer", 1436 * that is passed to drm_intel_bo_gem_create_from_name(). 1437 * 1438 * \see intel_update_renderbuffers() 1439 */ 1440static void 1441intel_process_dri2_buffer(struct brw_context *brw, 1442 __DRIdrawable *drawable, 1443 __DRIbuffer *buffer, 1444 struct intel_renderbuffer *rb, 1445 const char *buffer_name) 1446{ 1447 struct gl_framebuffer *fb = drawable->driverPrivate; 1448 drm_intel_bo *bo; 1449 1450 if (!rb) 1451 return; 1452 1453 unsigned num_samples = rb->Base.Base.NumSamples; 1454 1455 /* We try to avoid closing and reopening the same BO name, because the first 1456 * use of a mapping of the buffer involves a bunch of page faulting which is 1457 * moderately expensive. 1458 */ 1459 struct intel_mipmap_tree *last_mt; 1460 if (num_samples == 0) 1461 last_mt = rb->mt; 1462 else 1463 last_mt = rb->singlesample_mt; 1464 1465 uint32_t old_name = 0; 1466 if (last_mt) { 1467 /* The bo already has a name because the miptree was created by a 1468 * previous call to intel_process_dri2_buffer(). If a bo already has a 1469 * name, then drm_intel_bo_flink() is a low-cost getter. It does not 1470 * create a new name. 1471 */ 1472 drm_intel_bo_flink(last_mt->bo, &old_name); 1473 } 1474 1475 if (old_name == buffer->name) 1476 return; 1477 1478 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { 1479 fprintf(stderr, 1480 "attaching buffer %d, at %d, cpp %d, pitch %d\n", 1481 buffer->name, buffer->attachment, 1482 buffer->cpp, buffer->pitch); 1483 } 1484 1485 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name, 1486 buffer->name); 1487 if (!bo) { 1488 fprintf(stderr, 1489 "Failed to open BO for returned DRI2 buffer " 1490 "(%dx%d, %s, named %d).\n" 1491 "This is likely a bug in the X Server that will lead to a " 1492 "crash soon.\n", 1493 drawable->w, drawable->h, buffer_name, buffer->name); 1494 return; 1495 } 1496 1497 intel_update_winsys_renderbuffer_miptree(brw, rb, bo, 1498 drawable->w, drawable->h, 1499 buffer->pitch); 1500 1501 if (_mesa_is_front_buffer_drawing(fb) && 1502 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || 1503 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && 1504 rb->Base.Base.NumSamples > 1) { 1505 intel_renderbuffer_upsample(brw, rb); 1506 } 1507 1508 assert(rb->mt); 1509 1510 drm_intel_bo_unreference(bo); 1511} 1512 1513/** 1514 * \brief Query DRI image loader to obtain a DRIdrawable's buffers. 1515 * 1516 * To determine which DRI buffers to request, examine the renderbuffers 1517 * attached to the drawable's framebuffer. Then request the buffers from 1518 * the image loader 1519 * 1520 * This is called from intel_update_renderbuffers(). 1521 * 1522 * \param drawable Drawable whose buffers are queried. 1523 * \param buffers [out] List of buffers returned by DRI2 query. 1524 * \param buffer_count [out] Number of buffers returned. 1525 * 1526 * \see intel_update_renderbuffers() 1527 */ 1528 1529static void 1530intel_update_image_buffer(struct brw_context *intel, 1531 __DRIdrawable *drawable, 1532 struct intel_renderbuffer *rb, 1533 __DRIimage *buffer, 1534 enum __DRIimageBufferMask buffer_type) 1535{ 1536 struct gl_framebuffer *fb = drawable->driverPrivate; 1537 1538 if (!rb || !buffer->bo) 1539 return; 1540 1541 unsigned num_samples = rb->Base.Base.NumSamples; 1542 1543 /* Check and see if we're already bound to the right 1544 * buffer object 1545 */ 1546 struct intel_mipmap_tree *last_mt; 1547 if (num_samples == 0) 1548 last_mt = rb->mt; 1549 else 1550 last_mt = rb->singlesample_mt; 1551 1552 if (last_mt && last_mt->bo == buffer->bo) 1553 return; 1554 1555 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo, 1556 buffer->width, buffer->height, 1557 buffer->pitch); 1558 1559 if (_mesa_is_front_buffer_drawing(fb) && 1560 buffer_type == __DRI_IMAGE_BUFFER_FRONT && 1561 rb->Base.Base.NumSamples > 1) { 1562 intel_renderbuffer_upsample(intel, rb); 1563 } 1564} 1565 1566static void 1567intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1568{ 1569 struct gl_framebuffer *fb = drawable->driverPrivate; 1570 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1571 struct intel_renderbuffer *front_rb; 1572 struct intel_renderbuffer *back_rb; 1573 struct __DRIimageList images; 1574 unsigned int format; 1575 uint32_t buffer_mask = 0; 1576 1577 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1578 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1579 1580 if (back_rb) 1581 format = intel_rb_format(back_rb); 1582 else if (front_rb) 1583 format = intel_rb_format(front_rb); 1584 else 1585 return; 1586 1587 if (front_rb && (_mesa_is_front_buffer_drawing(fb) || 1588 _mesa_is_front_buffer_reading(fb) || !back_rb)) { 1589 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; 1590 } 1591 1592 if (back_rb) 1593 buffer_mask |= __DRI_IMAGE_BUFFER_BACK; 1594 1595 (*screen->image.loader->getBuffers) (drawable, 1596 driGLFormatToImageFormat(format), 1597 &drawable->dri2.stamp, 1598 drawable->loaderPrivate, 1599 buffer_mask, 1600 &images); 1601 1602 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { 1603 drawable->w = images.front->width; 1604 drawable->h = images.front->height; 1605 intel_update_image_buffer(brw, 1606 drawable, 1607 front_rb, 1608 images.front, 1609 __DRI_IMAGE_BUFFER_FRONT); 1610 } 1611 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) { 1612 drawable->w = images.back->width; 1613 drawable->h = images.back->height; 1614 intel_update_image_buffer(brw, 1615 drawable, 1616 back_rb, 1617 images.back, 1618 __DRI_IMAGE_BUFFER_BACK); 1619 } 1620} 1621