brw_context.c revision 7b9def35835232a10010f256b9c108219f97f752
1/* 2 Copyright 2003 VMware, Inc. 3 Copyright (C) Intel Corp. 2006. All Rights Reserved. 4 Intel funded Tungsten Graphics to 5 develop this 3D driver. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 "Software"), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice (including the 16 next paragraph) shall be included in all copies or substantial 17 portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 27 **********************************************************************/ 28 /* 29 * Authors: 30 * Keith Whitwell <keithw@vmware.com> 31 */ 32 33 34#include "main/api_exec.h" 35#include "main/context.h" 36#include "main/fbobject.h" 37#include "main/extensions.h" 38#include "main/imports.h" 39#include "main/macros.h" 40#include "main/points.h" 41#include "main/version.h" 42#include "main/vtxfmt.h" 43#include "main/texobj.h" 44#include "main/framebuffer.h" 45 46#include "vbo/vbo_context.h" 47 48#include "drivers/common/driverfuncs.h" 49#include "drivers/common/meta.h" 50#include "utils.h" 51 52#include "brw_context.h" 53#include "brw_defines.h" 54#include "brw_compiler.h" 55#include "brw_draw.h" 56#include "brw_state.h" 57 58#include "intel_batchbuffer.h" 59#include "intel_buffer_objects.h" 60#include "intel_buffers.h" 61#include "intel_fbo.h" 62#include "intel_mipmap_tree.h" 63#include "intel_pixel.h" 64#include "intel_image.h" 65#include "intel_tex.h" 66#include "intel_tex_obj.h" 67 68#include "swrast_setup/swrast_setup.h" 69#include "tnl/tnl.h" 70#include "tnl/t_pipeline.h" 71#include "util/ralloc.h" 72#include "util/debug.h" 73#include "isl/isl.h" 74 75/*************************************** 76 * Mesa's Driver Functions 77 ***************************************/ 78 79const char *const brw_vendor_string = "Intel Open Source Technology Center"; 80 81static const char * 82get_bsw_model(const struct intel_screen *intelScreen) 83{ 84 switch (intelScreen->eu_total) { 85 case 16: 86 return "405"; 87 case 12: 88 return "400"; 89 default: 90 return " "; 91 } 92} 93 94const char * 95brw_get_renderer_string(const struct intel_screen *intelScreen) 96{ 97 const char *chipset; 98 static char buffer[128]; 99 char *bsw = NULL; 100 101 switch (intelScreen->deviceID) { 102#undef CHIPSET 103#define CHIPSET(id, symbol, str) case id: chipset = str; break; 104#include "pci_ids/i965_pci_ids.h" 105 default: 106 chipset = "Unknown Intel Chipset"; 107 break; 108 } 109 110 /* Braswell branding is funny, so we have to fix it up here */ 111 if (intelScreen->deviceID == 0x22B1) { 112 bsw = strdup(chipset); 113 char *needle = strstr(bsw, "XXX"); 114 if (needle) { 115 memcpy(needle, get_bsw_model(intelScreen), 3); 116 chipset = bsw; 117 } 118 } 119 120 (void) driGetRendererString(buffer, chipset, 0); 121 free(bsw); 122 return buffer; 123} 124 125static const GLubyte * 126intel_get_string(struct gl_context * ctx, GLenum name) 127{ 128 const struct brw_context *const brw = brw_context(ctx); 129 130 switch (name) { 131 case GL_VENDOR: 132 return (GLubyte *) brw_vendor_string; 133 134 case GL_RENDERER: 135 return 136 (GLubyte *) brw_get_renderer_string(brw->intelScreen); 137 138 default: 139 return NULL; 140 } 141} 142 143static void 144intel_viewport(struct gl_context *ctx) 145{ 146 struct brw_context *brw = brw_context(ctx); 147 __DRIcontext *driContext = brw->driContext; 148 149 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 150 if (driContext->driDrawablePriv) 151 dri2InvalidateDrawable(driContext->driDrawablePriv); 152 if (driContext->driReadablePriv) 153 dri2InvalidateDrawable(driContext->driReadablePriv); 154 } 155} 156 157static void 158intel_update_framebuffer(struct gl_context *ctx, 159 struct gl_framebuffer *fb) 160{ 161 struct brw_context *brw = brw_context(ctx); 162 163 /* Quantize the derived default number of samples 164 */ 165 fb->DefaultGeometry._NumSamples = 166 intel_quantize_num_samples(brw->intelScreen, 167 fb->DefaultGeometry.NumSamples); 168} 169 170/* On Gen9 color buffers may be compressed by the hardware (lossless 171 * compression). There are, however, format restrictions and care needs to be 172 * taken that the sampler engine is capable for re-interpreting a buffer with 173 * format different the buffer was originally written with. 174 * 175 * For example, SRGB formats are not compressible and the sampler engine isn't 176 * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying 177 * color buffer needs to be resolved so that the sampling surface can be 178 * sampled as non-compressed (i.e., without the auxiliary MCS buffer being 179 * set). 180 */ 181static bool 182intel_texture_view_requires_resolve(struct brw_context *brw, 183 struct intel_texture_object *intel_tex) 184{ 185 if (brw->gen < 9 || 186 !intel_miptree_is_lossless_compressed(brw, intel_tex->mt)) 187 return false; 188 189 const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format); 190 191 if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo, 192 brw_format)) 193 return false; 194 195 perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", 196 _mesa_get_format_name(intel_tex->_Format), 197 _mesa_get_format_name(intel_tex->mt->format)); 198 199 return true; 200} 201 202static void 203intel_update_state(struct gl_context * ctx, GLuint new_state) 204{ 205 struct brw_context *brw = brw_context(ctx); 206 struct intel_texture_object *tex_obj; 207 struct intel_renderbuffer *depth_irb; 208 209 if (ctx->swrast_context) 210 _swrast_InvalidateState(ctx, new_state); 211 _vbo_InvalidateState(ctx, new_state); 212 213 brw->NewGLState |= new_state; 214 215 _mesa_unlock_context_textures(ctx); 216 217 /* Resolve the depth buffer's HiZ buffer. */ 218 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 219 if (depth_irb) 220 intel_renderbuffer_resolve_hiz(brw, depth_irb); 221 222 /* Resolve depth buffer and render cache of each enabled texture. */ 223 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; 224 for (int i = 0; i <= maxEnabledUnit; i++) { 225 if (!ctx->Texture.Unit[i]._Current) 226 continue; 227 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 228 if (!tex_obj || !tex_obj->mt) 229 continue; 230 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); 231 /* Sampling engine understands lossless compression and resolving 232 * those surfaces should be skipped for performance reasons. 233 */ 234 const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ? 235 0 : INTEL_MIPTREE_IGNORE_CCS_E; 236 intel_miptree_resolve_color(brw, tex_obj->mt, flags); 237 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 238 } 239 240 /* Resolve color for each active shader image. */ 241 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 242 const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ? 243 ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL; 244 245 if (unlikely(shader && shader->NumImages)) { 246 for (unsigned j = 0; j < shader->NumImages; j++) { 247 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]]; 248 tex_obj = intel_texture_object(u->TexObj); 249 250 if (tex_obj && tex_obj->mt) { 251 /* Access to images is implemented using indirect messages 252 * against data port. Normal render target write understands 253 * lossless compression but unfortunately the typed/untyped 254 * read/write interface doesn't. Therefore the compressed 255 * surfaces need to be resolved prior to accessing them. 256 */ 257 intel_miptree_resolve_color(brw, tex_obj->mt, 0); 258 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 259 } 260 } 261 } 262 } 263 264 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the 265 * single-sampled color renderbuffers because the CCS buffer isn't 266 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is 267 * enabled because otherwise the surface state will be programmed with the 268 * linear equivalent format anyway. 269 */ 270 if (brw->gen >= 9 && ctx->Color.sRGBEnabled) { 271 struct gl_framebuffer *fb = ctx->DrawBuffer; 272 for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { 273 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; 274 275 if (rb == NULL) 276 continue; 277 278 struct intel_renderbuffer *irb = intel_renderbuffer(rb); 279 struct intel_mipmap_tree *mt = irb->mt; 280 281 if (mt == NULL || 282 mt->num_samples > 1 || 283 _mesa_get_srgb_format_linear(mt->format) == mt->format) 284 continue; 285 286 /* Lossless compression is not supported for SRGB formats, it 287 * should be impossible to get here with such surfaces. 288 */ 289 assert(!intel_miptree_is_lossless_compressed(brw, mt)); 290 intel_miptree_resolve_color(brw, mt, 0); 291 brw_render_cache_set_check_flush(brw, mt->bo); 292 } 293 } 294 295 _mesa_lock_context_textures(ctx); 296 297 if (new_state & _NEW_BUFFERS) { 298 intel_update_framebuffer(ctx, ctx->DrawBuffer); 299 if (ctx->DrawBuffer != ctx->ReadBuffer) 300 intel_update_framebuffer(ctx, ctx->ReadBuffer); 301 } 302} 303 304#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) 305 306static void 307intel_flush_front(struct gl_context *ctx) 308{ 309 struct brw_context *brw = brw_context(ctx); 310 __DRIcontext *driContext = brw->driContext; 311 __DRIdrawable *driDrawable = driContext->driDrawablePriv; 312 __DRIscreen *const screen = brw->intelScreen->driScrnPriv; 313 314 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { 315 if (flushFront(screen) && driDrawable && 316 driDrawable->loaderPrivate) { 317 318 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. 319 * 320 * This potentially resolves both front and back buffer. It 321 * is unnecessary to resolve the back, but harms nothing except 322 * performance. And no one cares about front-buffer render 323 * performance. 324 */ 325 intel_resolve_for_dri2_flush(brw, driDrawable); 326 intel_batchbuffer_flush(brw); 327 328 flushFront(screen)(driDrawable, driDrawable->loaderPrivate); 329 330 /* We set the dirty bit in intel_prepare_render() if we're 331 * front buffer rendering once we get there. 332 */ 333 brw->front_buffer_dirty = false; 334 } 335 } 336} 337 338static void 339intel_glFlush(struct gl_context *ctx) 340{ 341 struct brw_context *brw = brw_context(ctx); 342 343 intel_batchbuffer_flush(brw); 344 intel_flush_front(ctx); 345 346 brw->need_flush_throttle = true; 347} 348 349static void 350intel_finish(struct gl_context * ctx) 351{ 352 struct brw_context *brw = brw_context(ctx); 353 354 intel_glFlush(ctx); 355 356 if (brw->batch.last_bo) 357 drm_intel_bo_wait_rendering(brw->batch.last_bo); 358} 359 360static void 361brw_init_driver_functions(struct brw_context *brw, 362 struct dd_function_table *functions) 363{ 364 _mesa_init_driver_functions(functions); 365 366 /* GLX uses DRI2 invalidate events to handle window resizing. 367 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib), 368 * which doesn't provide a mechanism for snooping the event queues. 369 * 370 * So EGL still relies on viewport hacks to handle window resizing. 371 * This should go away with DRI3000. 372 */ 373 if (!brw->driContext->driScreenPriv->dri2.useInvalidate) 374 functions->Viewport = intel_viewport; 375 376 functions->Flush = intel_glFlush; 377 functions->Finish = intel_finish; 378 functions->GetString = intel_get_string; 379 functions->UpdateState = intel_update_state; 380 381 intelInitTextureFuncs(functions); 382 intelInitTextureImageFuncs(functions); 383 intelInitTextureSubImageFuncs(functions); 384 intelInitTextureCopyImageFuncs(functions); 385 intelInitCopyImageFuncs(functions); 386 intelInitClearFuncs(functions); 387 intelInitBufferFuncs(functions); 388 intelInitPixelFuncs(functions); 389 intelInitBufferObjectFuncs(functions); 390 intel_init_syncobj_functions(functions); 391 brw_init_object_purgeable_functions(functions); 392 393 brwInitFragProgFuncs( functions ); 394 brw_init_common_queryobj_functions(functions); 395 if (brw->gen >= 8 || brw->is_haswell) 396 hsw_init_queryobj_functions(functions); 397 else if (brw->gen >= 6) 398 gen6_init_queryobj_functions(functions); 399 else 400 gen4_init_queryobj_functions(functions); 401 brw_init_compute_functions(functions); 402 if (brw->gen >= 7) 403 brw_init_conditional_render_functions(functions); 404 405 functions->QueryInternalFormat = brw_query_internal_format; 406 407 functions->NewTransformFeedback = brw_new_transform_feedback; 408 functions->DeleteTransformFeedback = brw_delete_transform_feedback; 409 if (brw->intelScreen->has_mi_math_and_lrr) { 410 functions->BeginTransformFeedback = hsw_begin_transform_feedback; 411 functions->EndTransformFeedback = hsw_end_transform_feedback; 412 functions->PauseTransformFeedback = hsw_pause_transform_feedback; 413 functions->ResumeTransformFeedback = hsw_resume_transform_feedback; 414 } else if (brw->gen >= 7) { 415 functions->BeginTransformFeedback = gen7_begin_transform_feedback; 416 functions->EndTransformFeedback = gen7_end_transform_feedback; 417 functions->PauseTransformFeedback = gen7_pause_transform_feedback; 418 functions->ResumeTransformFeedback = gen7_resume_transform_feedback; 419 functions->GetTransformFeedbackVertexCount = 420 brw_get_transform_feedback_vertex_count; 421 } else { 422 functions->BeginTransformFeedback = brw_begin_transform_feedback; 423 functions->EndTransformFeedback = brw_end_transform_feedback; 424 } 425 426 if (brw->gen >= 6) 427 functions->GetSamplePosition = gen6_get_sample_position; 428} 429 430static void 431brw_initialize_context_constants(struct brw_context *brw) 432{ 433 struct gl_context *ctx = &brw->ctx; 434 const struct brw_compiler *compiler = brw->intelScreen->compiler; 435 436 const bool stage_exists[MESA_SHADER_STAGES] = { 437 [MESA_SHADER_VERTEX] = true, 438 [MESA_SHADER_TESS_CTRL] = brw->gen >= 7, 439 [MESA_SHADER_TESS_EVAL] = brw->gen >= 7, 440 [MESA_SHADER_GEOMETRY] = brw->gen >= 6, 441 [MESA_SHADER_FRAGMENT] = true, 442 [MESA_SHADER_COMPUTE] = 443 (ctx->API == API_OPENGL_CORE && 444 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || 445 (ctx->API == API_OPENGLES2 && 446 ctx->Const.MaxComputeWorkGroupSize[0] >= 128) || 447 _mesa_extension_override_enables.ARB_compute_shader, 448 }; 449 450 unsigned num_stages = 0; 451 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 452 if (stage_exists[i]) 453 num_stages++; 454 } 455 456 unsigned max_samplers = 457 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; 458 459 ctx->Const.MaxDualSourceDrawBuffers = 1; 460 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; 461 ctx->Const.MaxCombinedShaderOutputResources = 462 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; 463 464 ctx->Const.QueryCounterBits.Timestamp = 36; 465 466 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ 467 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; 468 ctx->Const.MaxRenderbufferSize = 8192; 469 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); 470 ctx->Const.Max3DTextureLevels = 12; /* 2048 */ 471 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ 472 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; 473 ctx->Const.MaxTextureMbytes = 1536; 474 ctx->Const.MaxTextureRectSize = 1 << 12; 475 ctx->Const.MaxTextureMaxAnisotropy = 16.0; 476 ctx->Const.StripTextureBorder = true; 477 if (brw->gen >= 7) 478 ctx->Const.MaxProgramTextureGatherComponents = 4; 479 else if (brw->gen == 6) 480 ctx->Const.MaxProgramTextureGatherComponents = 1; 481 482 ctx->Const.MaxUniformBlockSize = 65536; 483 484 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 485 struct gl_program_constants *prog = &ctx->Const.Program[i]; 486 487 if (!stage_exists[i]) 488 continue; 489 490 prog->MaxTextureImageUnits = max_samplers; 491 492 prog->MaxUniformBlocks = BRW_MAX_UBO; 493 prog->MaxCombinedUniformComponents = 494 prog->MaxUniformComponents + 495 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; 496 497 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; 498 prog->MaxAtomicBuffers = BRW_MAX_ABO; 499 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; 500 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; 501 } 502 503 ctx->Const.MaxTextureUnits = 504 MIN2(ctx->Const.MaxTextureCoordUnits, 505 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); 506 507 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; 508 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; 509 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; 510 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; 511 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; 512 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; 513 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; 514 515 516 /* Hardware only supports a limited number of transform feedback buffers. 517 * So we need to override the Mesa default (which is based only on software 518 * limits). 519 */ 520 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; 521 522 /* On Gen6, in the worst case, we use up one binding table entry per 523 * transform feedback component (see comments above the definition of 524 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value 525 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to 526 * BRW_MAX_SOL_BINDINGS. 527 * 528 * In "separate components" mode, we need to divide this value by 529 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries 530 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. 531 */ 532 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; 533 ctx->Const.MaxTransformFeedbackSeparateComponents = 534 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; 535 536 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = 537 !brw->intelScreen->has_mi_math_and_lrr; 538 539 int max_samples; 540 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); 541 const int clamp_max_samples = 542 driQueryOptioni(&brw->optionCache, "clamp_max_samples"); 543 544 if (clamp_max_samples < 0) { 545 max_samples = msaa_modes[0]; 546 } else { 547 /* Select the largest supported MSAA mode that does not exceed 548 * clamp_max_samples. 549 */ 550 max_samples = 0; 551 for (int i = 0; msaa_modes[i] != 0; ++i) { 552 if (msaa_modes[i] <= clamp_max_samples) { 553 max_samples = msaa_modes[i]; 554 break; 555 } 556 } 557 } 558 559 ctx->Const.MaxSamples = max_samples; 560 ctx->Const.MaxColorTextureSamples = max_samples; 561 ctx->Const.MaxDepthTextureSamples = max_samples; 562 ctx->Const.MaxIntegerSamples = max_samples; 563 ctx->Const.MaxImageSamples = 0; 564 565 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used 566 * to map indices of rectangular grid to sample numbers within a pixel. 567 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled 568 * extension implementation. For more details see the comment above 569 * gen6_set_sample_maps() definition. 570 */ 571 gen6_set_sample_maps(ctx); 572 573 ctx->Const.MinLineWidth = 1.0; 574 ctx->Const.MinLineWidthAA = 1.0; 575 if (brw->gen >= 6) { 576 ctx->Const.MaxLineWidth = 7.375; 577 ctx->Const.MaxLineWidthAA = 7.375; 578 ctx->Const.LineWidthGranularity = 0.125; 579 } else { 580 ctx->Const.MaxLineWidth = 7.0; 581 ctx->Const.MaxLineWidthAA = 7.0; 582 ctx->Const.LineWidthGranularity = 0.5; 583 } 584 585 /* For non-antialiased lines, we have to round the line width to the 586 * nearest whole number. Make sure that we don't advertise a line 587 * width that, when rounded, will be beyond the actual hardware 588 * maximum. 589 */ 590 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); 591 592 ctx->Const.MinPointSize = 1.0; 593 ctx->Const.MinPointSizeAA = 1.0; 594 ctx->Const.MaxPointSize = 255.0; 595 ctx->Const.MaxPointSizeAA = 255.0; 596 ctx->Const.PointSizeGranularity = 1.0; 597 598 if (brw->gen >= 5 || brw->is_g4x) 599 ctx->Const.MaxClipPlanes = 8; 600 601 ctx->Const.LowerTessLevel = true; 602 ctx->Const.LowerCsDerivedVariables = true; 603 ctx->Const.PrimitiveRestartForPatches = true; 604 605 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; 606 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; 607 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; 608 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0; 609 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0; 610 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0; 611 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0; 612 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16; 613 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256; 614 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1; 615 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024; 616 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams = 617 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters, 618 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams); 619 620 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024; 621 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024; 622 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024; 623 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024; 624 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12; 625 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256; 626 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0; 627 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024; 628 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams = 629 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters, 630 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams); 631 632 /* Fragment shaders use real, 32-bit twos-complement integers for all 633 * integer types. 634 */ 635 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31; 636 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30; 637 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0; 638 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 639 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 640 641 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; 642 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; 643 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; 644 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 645 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 646 647 /* Gen6 converts quads to polygon in beginning of 3D pipeline, 648 * but we're not sure how it's actually done for vertex order, 649 * that affect provoking vertex decision. Always use last vertex 650 * convention for quad primitive which works as expected for now. 651 */ 652 if (brw->gen >= 6) 653 ctx->Const.QuadsFollowProvokingVertexConvention = false; 654 655 ctx->Const.NativeIntegers = true; 656 ctx->Const.VertexID_is_zero_based = true; 657 658 /* Regarding the CMP instruction, the Ivybridge PRM says: 659 * 660 * "For each enabled channel 0b or 1b is assigned to the appropriate flag 661 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord 662 * 0xFFFFFFFF) is assigned to dst." 663 * 664 * but PRMs for earlier generations say 665 * 666 * "In dword format, one GRF may store up to 8 results. When the register 667 * is used later as a vector of Booleans, as only LSB at each channel 668 * contains meaning [sic] data, software should make sure all higher bits 669 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." 670 * 671 * We select the representation of a true boolean uniform to be ~0, and fix 672 * the results of Gen <= 5 CMP instruction's with -(result & 1). 673 */ 674 ctx->Const.UniformBooleanTrue = ~0; 675 676 /* From the gen4 PRM, volume 4 page 127: 677 * 678 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies 679 * the base address of the first element of the surface, computed in 680 * software by adding the surface base address to the byte offset of 681 * the element in the buffer." 682 * 683 * However, unaligned accesses are slower, so enforce buffer alignment. 684 */ 685 ctx->Const.UniformBufferOffsetAlignment = 16; 686 687 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so 688 * that we can safely have the CPU and GPU writing the same SSBO on 689 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never 690 * writes, so there's no problem. For an SSBO, the GPU and the CPU can 691 * be updating disjoint regions of the buffer simultaneously and that will 692 * break if the regions overlap the same cacheline. 693 */ 694 ctx->Const.ShaderStorageBufferOffsetAlignment = 64; 695 ctx->Const.TextureBufferOffsetAlignment = 16; 696 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; 697 698 if (brw->gen >= 6) { 699 ctx->Const.MaxVarying = 32; 700 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; 701 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64; 702 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; 703 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; 704 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; 705 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128; 706 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128; 707 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128; 708 } 709 710 /* We want the GLSL compiler to emit code that uses condition codes */ 711 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 712 ctx->Const.ShaderCompilerOptions[i] = 713 brw->intelScreen->compiler->glsl_compiler_options[i]; 714 } 715 716 if (brw->gen >= 7) { 717 ctx->Const.MaxViewportWidth = 32768; 718 ctx->Const.MaxViewportHeight = 32768; 719 } 720 721 /* ARB_viewport_array */ 722 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { 723 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; 724 ctx->Const.ViewportSubpixelBits = 0; 725 726 /* Cast to float before negating because MaxViewportWidth is unsigned. 727 */ 728 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; 729 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; 730 } 731 732 /* ARB_gpu_shader5 */ 733 if (brw->gen >= 7) 734 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); 735 736 /* ARB_framebuffer_no_attachments */ 737 ctx->Const.MaxFramebufferWidth = 16384; 738 ctx->Const.MaxFramebufferHeight = 16384; 739 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; 740 ctx->Const.MaxFramebufferSamples = max_samples; 741} 742 743static void 744brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads) 745{ 746 struct gl_context *ctx = &brw->ctx; 747 /* Maximum number of scalar compute shader invocations that can be run in 748 * parallel in the same subslice assuming SIMD32 dispatch. 749 */ 750 const uint32_t max_invocations = 32 * max_threads; 751 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; 752 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; 753 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; 754 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; 755 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; 756} 757 758/** 759 * Process driconf (drirc) options, setting appropriate context flags. 760 * 761 * intelInitExtensions still pokes at optionCache directly, in order to 762 * avoid advertising various extensions. No flags are set, so it makes 763 * sense to continue doing that there. 764 */ 765static void 766brw_process_driconf_options(struct brw_context *brw) 767{ 768 struct gl_context *ctx = &brw->ctx; 769 770 driOptionCache *options = &brw->optionCache; 771 driParseConfigFiles(options, &brw->intelScreen->optionCache, 772 brw->driContext->driScreenPriv->myNum, "i965"); 773 774 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); 775 switch (bo_reuse_mode) { 776 case DRI_CONF_BO_REUSE_DISABLED: 777 break; 778 case DRI_CONF_BO_REUSE_ALL: 779 intel_bufmgr_gem_enable_reuse(brw->bufmgr); 780 break; 781 } 782 783 if (!driQueryOptionb(options, "hiz")) { 784 brw->has_hiz = false; 785 /* On gen6, you can only do separate stencil with HIZ. */ 786 if (brw->gen == 6) 787 brw->has_separate_stencil = false; 788 } 789 790 if (driQueryOptionb(options, "always_flush_batch")) { 791 fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); 792 brw->always_flush_batch = true; 793 } 794 795 if (driQueryOptionb(options, "always_flush_cache")) { 796 fprintf(stderr, "flushing GPU caches before/after each draw call\n"); 797 brw->always_flush_cache = true; 798 } 799 800 if (driQueryOptionb(options, "disable_throttling")) { 801 fprintf(stderr, "disabling flush throttling\n"); 802 brw->disable_throttling = true; 803 } 804 805 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); 806 807 ctx->Const.ForceGLSLExtensionsWarn = 808 driQueryOptionb(options, "force_glsl_extensions_warn"); 809 810 ctx->Const.DisableGLSLLineContinuations = 811 driQueryOptionb(options, "disable_glsl_line_continuations"); 812 813 ctx->Const.AllowGLSLExtensionDirectiveMidShader = 814 driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); 815 816 brw->dual_color_blend_by_location = 817 driQueryOptionb(options, "dual_color_blend_by_location"); 818} 819 820GLboolean 821brwCreateContext(gl_api api, 822 const struct gl_config *mesaVis, 823 __DRIcontext *driContextPriv, 824 unsigned major_version, 825 unsigned minor_version, 826 uint32_t flags, 827 bool notify_reset, 828 unsigned *dri_ctx_error, 829 void *sharedContextPrivate) 830{ 831 __DRIscreen *sPriv = driContextPriv->driScreenPriv; 832 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; 833 struct intel_screen *screen = sPriv->driverPrivate; 834 const struct brw_device_info *devinfo = screen->devinfo; 835 struct dd_function_table functions; 836 837 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel 838 * provides us with context reset notifications. 839 */ 840 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG 841 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE; 842 843 if (screen->has_context_reset_notification) 844 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; 845 846 if (flags & ~allowed_flags) { 847 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; 848 return false; 849 } 850 851 struct brw_context *brw = rzalloc(NULL, struct brw_context); 852 if (!brw) { 853 fprintf(stderr, "%s: failed to alloc context\n", __func__); 854 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 855 return false; 856 } 857 858 driContextPriv->driverPrivate = brw; 859 brw->driContext = driContextPriv; 860 brw->intelScreen = screen; 861 brw->bufmgr = screen->bufmgr; 862 863 brw->gen = devinfo->gen; 864 brw->gt = devinfo->gt; 865 brw->is_g4x = devinfo->is_g4x; 866 brw->is_baytrail = devinfo->is_baytrail; 867 brw->is_haswell = devinfo->is_haswell; 868 brw->is_cherryview = devinfo->is_cherryview; 869 brw->is_broxton = devinfo->is_broxton; 870 brw->has_llc = devinfo->has_llc; 871 brw->has_hiz = devinfo->has_hiz_and_separate_stencil; 872 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; 873 brw->has_pln = devinfo->has_pln; 874 brw->has_compr4 = devinfo->has_compr4; 875 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; 876 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; 877 brw->needs_unlit_centroid_workaround = 878 devinfo->needs_unlit_centroid_workaround; 879 880 brw->must_use_separate_stencil = devinfo->must_use_separate_stencil; 881 brw->has_swizzling = screen->hw_has_swizzling; 882 883 brw->vs.base.stage = MESA_SHADER_VERTEX; 884 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; 885 brw->tes.base.stage = MESA_SHADER_TESS_EVAL; 886 brw->gs.base.stage = MESA_SHADER_GEOMETRY; 887 brw->wm.base.stage = MESA_SHADER_FRAGMENT; 888 if (brw->gen >= 8) { 889 gen8_init_vtable_surface_functions(brw); 890 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; 891 } else if (brw->gen >= 7) { 892 gen7_init_vtable_surface_functions(brw); 893 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; 894 } else if (brw->gen >= 6) { 895 gen6_init_vtable_surface_functions(brw); 896 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; 897 } else { 898 gen4_init_vtable_surface_functions(brw); 899 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; 900 } 901 902 brw_init_driver_functions(brw, &functions); 903 904 if (notify_reset) 905 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; 906 907 struct gl_context *ctx = &brw->ctx; 908 909 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { 910 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 911 fprintf(stderr, "%s: failed to init mesa context\n", __func__); 912 intelDestroyContext(driContextPriv); 913 return false; 914 } 915 916 driContextSetFlags(ctx, flags); 917 918 /* Initialize the software rasterizer and helper modules. 919 * 920 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for 921 * software fallbacks (which we have to support on legacy GL to do weird 922 * glDrawPixels(), glBitmap(), and other functions). 923 */ 924 if (api != API_OPENGL_CORE && api != API_OPENGLES2) { 925 _swrast_CreateContext(ctx); 926 } 927 928 _vbo_CreateContext(ctx); 929 if (ctx->swrast_context) { 930 _tnl_CreateContext(ctx); 931 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; 932 _swsetup_CreateContext(ctx); 933 934 /* Configure swrast to match hardware characteristics: */ 935 _swrast_allow_pixel_fog(ctx, false); 936 _swrast_allow_vertex_fog(ctx, true); 937 } 938 939 _mesa_meta_init(ctx); 940 941 brw_process_driconf_options(brw); 942 943 if (INTEL_DEBUG & DEBUG_PERF) 944 brw->perf_debug = true; 945 946 brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads); 947 brw_initialize_context_constants(brw); 948 949 ctx->Const.ResetStrategy = notify_reset 950 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; 951 952 /* Reinitialize the context point state. It depends on ctx->Const values. */ 953 _mesa_init_point(ctx); 954 955 intel_fbo_init(brw); 956 957 intel_batchbuffer_init(brw); 958 959 if (brw->gen >= 6) { 960 /* Create a new hardware context. Using a hardware context means that 961 * our GPU state will be saved/restored on context switch, allowing us 962 * to assume that the GPU is in the same state we left it in. 963 * 964 * This is required for transform feedback buffer offsets, query objects, 965 * and also allows us to reduce how much state we have to emit. 966 */ 967 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); 968 969 if (!brw->hw_ctx) { 970 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); 971 intelDestroyContext(driContextPriv); 972 return false; 973 } 974 } 975 976 if (brw_init_pipe_control(brw, devinfo)) { 977 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 978 intelDestroyContext(driContextPriv); 979 return false; 980 } 981 982 brw_init_state(brw); 983 984 intelInitExtensions(ctx); 985 986 brw_init_surface_formats(brw); 987 988 brw->max_vs_threads = devinfo->max_vs_threads; 989 brw->max_hs_threads = devinfo->max_hs_threads; 990 brw->max_ds_threads = devinfo->max_ds_threads; 991 brw->max_gs_threads = devinfo->max_gs_threads; 992 brw->max_wm_threads = devinfo->max_wm_threads; 993 /* FINISHME: Do this for all platforms that the kernel supports */ 994 if (brw->is_cherryview && 995 screen->subslice_total > 0 && screen->eu_total > 0) { 996 /* Logical CS threads = EUs per subslice * 7 threads per EU */ 997 brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7; 998 999 /* Fuse configurations may give more threads than expected, never less. */ 1000 if (brw->max_cs_threads < devinfo->max_cs_threads) 1001 brw->max_cs_threads = devinfo->max_cs_threads; 1002 } else { 1003 brw->max_cs_threads = devinfo->max_cs_threads; 1004 } 1005 brw->urb.size = devinfo->urb.size; 1006 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; 1007 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; 1008 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; 1009 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; 1010 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; 1011 1012 /* Estimate the size of the mappable aperture into the GTT. There's an 1013 * ioctl to get the whole GTT size, but not one to get the mappable subset. 1014 * It turns out it's basically always 256MB, though some ancient hardware 1015 * was smaller. 1016 */ 1017 uint32_t gtt_size = 256 * 1024 * 1024; 1018 1019 /* We don't want to map two objects such that a memcpy between them would 1020 * just fault one mapping in and then the other over and over forever. So 1021 * we would need to divide the GTT size by 2. Additionally, some GTT is 1022 * taken up by things like the framebuffer and the ringbuffer and such, so 1023 * be more conservative. 1024 */ 1025 brw->max_gtt_map_object_size = gtt_size / 4; 1026 1027 if (brw->gen == 6) 1028 brw->urb.gs_present = false; 1029 1030 brw->prim_restart.in_progress = false; 1031 brw->prim_restart.enable_cut_index = false; 1032 brw->gs.enabled = false; 1033 brw->sf.viewport_transform_enable = true; 1034 1035 brw->predicate.state = BRW_PREDICATE_STATE_RENDER; 1036 1037 brw->use_resource_streamer = screen->has_resource_streamer && 1038 (env_var_as_boolean("INTEL_USE_HW_BT", false) || 1039 env_var_as_boolean("INTEL_USE_GATHER", false)); 1040 1041 ctx->VertexProgram._MaintainTnlProgram = true; 1042 ctx->FragmentProgram._MaintainTexEnvProgram = true; 1043 1044 brw_draw_init( brw ); 1045 1046 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { 1047 /* Turn on some extra GL_ARB_debug_output generation. */ 1048 brw->perf_debug = true; 1049 } 1050 1051 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) 1052 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; 1053 1054 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 1055 brw_init_shader_time(brw); 1056 1057 _mesa_compute_version(ctx); 1058 1059 _mesa_initialize_dispatch_tables(ctx); 1060 _mesa_initialize_vbo_vtxfmt(ctx); 1061 1062 if (ctx->Extensions.AMD_performance_monitor) { 1063 brw_init_performance_monitors(brw); 1064 } 1065 1066 vbo_use_buffer_objects(ctx); 1067 vbo_always_unmap_buffers(ctx); 1068 1069 return true; 1070} 1071 1072void 1073intelDestroyContext(__DRIcontext * driContextPriv) 1074{ 1075 struct brw_context *brw = 1076 (struct brw_context *) driContextPriv->driverPrivate; 1077 struct gl_context *ctx = &brw->ctx; 1078 1079 /* Dump a final BMP in case the application doesn't call SwapBuffers */ 1080 if (INTEL_DEBUG & DEBUG_AUB) { 1081 intel_batchbuffer_flush(brw); 1082 aub_dump_bmp(&brw->ctx); 1083 } 1084 1085 _mesa_meta_free(&brw->ctx); 1086 1087 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 1088 /* Force a report. */ 1089 brw->shader_time.report_time = 0; 1090 1091 brw_collect_and_report_shader_time(brw); 1092 brw_destroy_shader_time(brw); 1093 } 1094 1095 brw_destroy_state(brw); 1096 brw_draw_destroy(brw); 1097 1098 drm_intel_bo_unreference(brw->curbe.curbe_bo); 1099 if (brw->vs.base.scratch_bo) 1100 drm_intel_bo_unreference(brw->vs.base.scratch_bo); 1101 if (brw->gs.base.scratch_bo) 1102 drm_intel_bo_unreference(brw->gs.base.scratch_bo); 1103 if (brw->wm.base.scratch_bo) 1104 drm_intel_bo_unreference(brw->wm.base.scratch_bo); 1105 1106 gen7_reset_hw_bt_pool_offsets(brw); 1107 drm_intel_bo_unreference(brw->hw_bt_pool.bo); 1108 brw->hw_bt_pool.bo = NULL; 1109 1110 drm_intel_gem_context_destroy(brw->hw_ctx); 1111 1112 if (ctx->swrast_context) { 1113 _swsetup_DestroyContext(&brw->ctx); 1114 _tnl_DestroyContext(&brw->ctx); 1115 } 1116 _vbo_DestroyContext(&brw->ctx); 1117 1118 if (ctx->swrast_context) 1119 _swrast_DestroyContext(&brw->ctx); 1120 1121 brw_fini_pipe_control(brw); 1122 intel_batchbuffer_free(brw); 1123 1124 drm_intel_bo_unreference(brw->throttle_batch[1]); 1125 drm_intel_bo_unreference(brw->throttle_batch[0]); 1126 brw->throttle_batch[1] = NULL; 1127 brw->throttle_batch[0] = NULL; 1128 1129 driDestroyOptionCache(&brw->optionCache); 1130 1131 /* free the Mesa context */ 1132 _mesa_free_context_data(&brw->ctx); 1133 1134 ralloc_free(brw); 1135 driContextPriv->driverPrivate = NULL; 1136} 1137 1138GLboolean 1139intelUnbindContext(__DRIcontext * driContextPriv) 1140{ 1141 /* Unset current context and dispath table */ 1142 _mesa_make_current(NULL, NULL, NULL); 1143 1144 return true; 1145} 1146 1147/** 1148 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior 1149 * on window system framebuffers. 1150 * 1151 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if 1152 * your renderbuffer can do sRGB encode, and you can flip a switch that does 1153 * sRGB encode if the renderbuffer can handle it. You can ask specifically 1154 * for a visual where you're guaranteed to be capable, but it turns out that 1155 * everyone just makes all their ARGB8888 visuals capable and doesn't offer 1156 * incapable ones, because there's no difference between the two in resources 1157 * used. Applications thus get built that accidentally rely on the default 1158 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds 1159 * great... 1160 * 1161 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode 1162 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. 1163 * So they removed the enable knob and made it "if the renderbuffer is sRGB 1164 * capable, do sRGB encode". Then, for your window system renderbuffers, you 1165 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals 1166 * and get no sRGB encode (assuming that both kinds of visual are available). 1167 * Thus our choice to support sRGB by default on our visuals for desktop would 1168 * result in broken rendering of GLES apps that aren't expecting sRGB encode. 1169 * 1170 * Unfortunately, renderbuffer setup happens before a context is created. So 1171 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 1172 * context (without an sRGB visual, though we don't have sRGB visuals exposed 1173 * yet), we go turn that back off before anyone finds out. 1174 */ 1175static void 1176intel_gles3_srgb_workaround(struct brw_context *brw, 1177 struct gl_framebuffer *fb) 1178{ 1179 struct gl_context *ctx = &brw->ctx; 1180 1181 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) 1182 return; 1183 1184 /* Some day when we support the sRGB capable bit on visuals available for 1185 * GLES, we'll need to respect that and not disable things here. 1186 */ 1187 fb->Visual.sRGBCapable = false; 1188 for (int i = 0; i < BUFFER_COUNT; i++) { 1189 struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; 1190 if (rb) 1191 rb->Format = _mesa_get_srgb_format_linear(rb->Format); 1192 } 1193} 1194 1195GLboolean 1196intelMakeCurrent(__DRIcontext * driContextPriv, 1197 __DRIdrawable * driDrawPriv, 1198 __DRIdrawable * driReadPriv) 1199{ 1200 struct brw_context *brw; 1201 GET_CURRENT_CONTEXT(curCtx); 1202 1203 if (driContextPriv) 1204 brw = (struct brw_context *) driContextPriv->driverPrivate; 1205 else 1206 brw = NULL; 1207 1208 /* According to the glXMakeCurrent() man page: "Pending commands to 1209 * the previous context, if any, are flushed before it is released." 1210 * But only flush if we're actually changing contexts. 1211 */ 1212 if (brw_context(curCtx) && brw_context(curCtx) != brw) { 1213 _mesa_flush(curCtx); 1214 } 1215 1216 if (driContextPriv) { 1217 struct gl_context *ctx = &brw->ctx; 1218 struct gl_framebuffer *fb, *readFb; 1219 1220 if (driDrawPriv == NULL) { 1221 fb = _mesa_get_incomplete_framebuffer(); 1222 } else { 1223 fb = driDrawPriv->driverPrivate; 1224 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; 1225 } 1226 1227 if (driReadPriv == NULL) { 1228 readFb = _mesa_get_incomplete_framebuffer(); 1229 } else { 1230 readFb = driReadPriv->driverPrivate; 1231 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; 1232 } 1233 1234 /* The sRGB workaround changes the renderbuffer's format. We must change 1235 * the format before the renderbuffer's miptree get's allocated, otherwise 1236 * the formats of the renderbuffer and its miptree will differ. 1237 */ 1238 intel_gles3_srgb_workaround(brw, fb); 1239 intel_gles3_srgb_workaround(brw, readFb); 1240 1241 /* If the context viewport hasn't been initialized, force a call out to 1242 * the loader to get buffers so we have a drawable size for the initial 1243 * viewport. */ 1244 if (!brw->ctx.ViewportInitialized) 1245 intel_prepare_render(brw); 1246 1247 _mesa_make_current(ctx, fb, readFb); 1248 } else { 1249 _mesa_make_current(NULL, NULL, NULL); 1250 } 1251 1252 return true; 1253} 1254 1255void 1256intel_resolve_for_dri2_flush(struct brw_context *brw, 1257 __DRIdrawable *drawable) 1258{ 1259 if (brw->gen < 6) { 1260 /* MSAA and fast color clear are not supported, so don't waste time 1261 * checking whether a resolve is needed. 1262 */ 1263 return; 1264 } 1265 1266 struct gl_framebuffer *fb = drawable->driverPrivate; 1267 struct intel_renderbuffer *rb; 1268 1269 /* Usually, only the back buffer will need to be downsampled. However, 1270 * the front buffer will also need it if the user has rendered into it. 1271 */ 1272 static const gl_buffer_index buffers[2] = { 1273 BUFFER_BACK_LEFT, 1274 BUFFER_FRONT_LEFT, 1275 }; 1276 1277 for (int i = 0; i < 2; ++i) { 1278 rb = intel_get_renderbuffer(fb, buffers[i]); 1279 if (rb == NULL || rb->mt == NULL) 1280 continue; 1281 if (rb->mt->num_samples <= 1) 1282 intel_miptree_resolve_color(brw, rb->mt, 0); 1283 else 1284 intel_renderbuffer_downsample(brw, rb); 1285 } 1286} 1287 1288static unsigned 1289intel_bits_per_pixel(const struct intel_renderbuffer *rb) 1290{ 1291 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; 1292} 1293 1294static void 1295intel_query_dri2_buffers(struct brw_context *brw, 1296 __DRIdrawable *drawable, 1297 __DRIbuffer **buffers, 1298 int *count); 1299 1300static void 1301intel_process_dri2_buffer(struct brw_context *brw, 1302 __DRIdrawable *drawable, 1303 __DRIbuffer *buffer, 1304 struct intel_renderbuffer *rb, 1305 const char *buffer_name); 1306 1307static void 1308intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable); 1309 1310static void 1311intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1312{ 1313 struct gl_framebuffer *fb = drawable->driverPrivate; 1314 struct intel_renderbuffer *rb; 1315 __DRIbuffer *buffers = NULL; 1316 int i, count; 1317 const char *region_name; 1318 1319 /* Set this up front, so that in case our buffers get invalidated 1320 * while we're getting new buffers, we don't clobber the stamp and 1321 * thus ignore the invalidate. */ 1322 drawable->lastStamp = drawable->dri2.stamp; 1323 1324 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1325 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1326 1327 intel_query_dri2_buffers(brw, drawable, &buffers, &count); 1328 1329 if (buffers == NULL) 1330 return; 1331 1332 for (i = 0; i < count; i++) { 1333 switch (buffers[i].attachment) { 1334 case __DRI_BUFFER_FRONT_LEFT: 1335 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1336 region_name = "dri2 front buffer"; 1337 break; 1338 1339 case __DRI_BUFFER_FAKE_FRONT_LEFT: 1340 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1341 region_name = "dri2 fake front buffer"; 1342 break; 1343 1344 case __DRI_BUFFER_BACK_LEFT: 1345 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1346 region_name = "dri2 back buffer"; 1347 break; 1348 1349 case __DRI_BUFFER_DEPTH: 1350 case __DRI_BUFFER_HIZ: 1351 case __DRI_BUFFER_DEPTH_STENCIL: 1352 case __DRI_BUFFER_STENCIL: 1353 case __DRI_BUFFER_ACCUM: 1354 default: 1355 fprintf(stderr, 1356 "unhandled buffer attach event, attachment type %d\n", 1357 buffers[i].attachment); 1358 return; 1359 } 1360 1361 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name); 1362 } 1363 1364} 1365 1366void 1367intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) 1368{ 1369 struct brw_context *brw = context->driverPrivate; 1370 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1371 1372 /* Set this up front, so that in case our buffers get invalidated 1373 * while we're getting new buffers, we don't clobber the stamp and 1374 * thus ignore the invalidate. */ 1375 drawable->lastStamp = drawable->dri2.stamp; 1376 1377 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1378 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1379 1380 if (screen->image.loader) 1381 intel_update_image_buffers(brw, drawable); 1382 else 1383 intel_update_dri2_buffers(brw, drawable); 1384 1385 driUpdateFramebufferSize(&brw->ctx, drawable); 1386} 1387 1388/** 1389 * intel_prepare_render should be called anywhere that curent read/drawbuffer 1390 * state is required. 1391 */ 1392void 1393intel_prepare_render(struct brw_context *brw) 1394{ 1395 struct gl_context *ctx = &brw->ctx; 1396 __DRIcontext *driContext = brw->driContext; 1397 __DRIdrawable *drawable; 1398 1399 drawable = driContext->driDrawablePriv; 1400 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { 1401 if (drawable->lastStamp != drawable->dri2.stamp) 1402 intel_update_renderbuffers(driContext, drawable); 1403 driContext->dri2.draw_stamp = drawable->dri2.stamp; 1404 } 1405 1406 drawable = driContext->driReadablePriv; 1407 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { 1408 if (drawable->lastStamp != drawable->dri2.stamp) 1409 intel_update_renderbuffers(driContext, drawable); 1410 driContext->dri2.read_stamp = drawable->dri2.stamp; 1411 } 1412 1413 /* If we're currently rendering to the front buffer, the rendering 1414 * that will happen next will probably dirty the front buffer. So 1415 * mark it as dirty here. 1416 */ 1417 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) 1418 brw->front_buffer_dirty = true; 1419} 1420 1421/** 1422 * \brief Query DRI2 to obtain a DRIdrawable's buffers. 1423 * 1424 * To determine which DRI buffers to request, examine the renderbuffers 1425 * attached to the drawable's framebuffer. Then request the buffers with 1426 * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). 1427 * 1428 * This is called from intel_update_renderbuffers(). 1429 * 1430 * \param drawable Drawable whose buffers are queried. 1431 * \param buffers [out] List of buffers returned by DRI2 query. 1432 * \param buffer_count [out] Number of buffers returned. 1433 * 1434 * \see intel_update_renderbuffers() 1435 * \see DRI2GetBuffers() 1436 * \see DRI2GetBuffersWithFormat() 1437 */ 1438static void 1439intel_query_dri2_buffers(struct brw_context *brw, 1440 __DRIdrawable *drawable, 1441 __DRIbuffer **buffers, 1442 int *buffer_count) 1443{ 1444 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1445 struct gl_framebuffer *fb = drawable->driverPrivate; 1446 int i = 0; 1447 unsigned attachments[8]; 1448 1449 struct intel_renderbuffer *front_rb; 1450 struct intel_renderbuffer *back_rb; 1451 1452 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1453 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1454 1455 memset(attachments, 0, sizeof(attachments)); 1456 if ((_mesa_is_front_buffer_drawing(fb) || 1457 _mesa_is_front_buffer_reading(fb) || 1458 !back_rb) && front_rb) { 1459 /* If a fake front buffer is in use, then querying for 1460 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from 1461 * the real front buffer to the fake front buffer. So before doing the 1462 * query, we need to make sure all the pending drawing has landed in the 1463 * real front buffer. 1464 */ 1465 intel_batchbuffer_flush(brw); 1466 intel_flush_front(&brw->ctx); 1467 1468 attachments[i++] = __DRI_BUFFER_FRONT_LEFT; 1469 attachments[i++] = intel_bits_per_pixel(front_rb); 1470 } else if (front_rb && brw->front_buffer_dirty) { 1471 /* We have pending front buffer rendering, but we aren't querying for a 1472 * front buffer. If the front buffer we have is a fake front buffer, 1473 * the X server is going to throw it away when it processes the query. 1474 * So before doing the query, make sure all the pending drawing has 1475 * landed in the real front buffer. 1476 */ 1477 intel_batchbuffer_flush(brw); 1478 intel_flush_front(&brw->ctx); 1479 } 1480 1481 if (back_rb) { 1482 attachments[i++] = __DRI_BUFFER_BACK_LEFT; 1483 attachments[i++] = intel_bits_per_pixel(back_rb); 1484 } 1485 1486 assert(i <= ARRAY_SIZE(attachments)); 1487 1488 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable, 1489 &drawable->w, 1490 &drawable->h, 1491 attachments, i / 2, 1492 buffer_count, 1493 drawable->loaderPrivate); 1494} 1495 1496/** 1497 * \brief Assign a DRI buffer's DRM region to a renderbuffer. 1498 * 1499 * This is called from intel_update_renderbuffers(). 1500 * 1501 * \par Note: 1502 * DRI buffers whose attachment point is DRI2BufferStencil or 1503 * DRI2BufferDepthStencil are handled as special cases. 1504 * 1505 * \param buffer_name is a human readable name, such as "dri2 front buffer", 1506 * that is passed to drm_intel_bo_gem_create_from_name(). 1507 * 1508 * \see intel_update_renderbuffers() 1509 */ 1510static void 1511intel_process_dri2_buffer(struct brw_context *brw, 1512 __DRIdrawable *drawable, 1513 __DRIbuffer *buffer, 1514 struct intel_renderbuffer *rb, 1515 const char *buffer_name) 1516{ 1517 struct gl_framebuffer *fb = drawable->driverPrivate; 1518 drm_intel_bo *bo; 1519 1520 if (!rb) 1521 return; 1522 1523 unsigned num_samples = rb->Base.Base.NumSamples; 1524 1525 /* We try to avoid closing and reopening the same BO name, because the first 1526 * use of a mapping of the buffer involves a bunch of page faulting which is 1527 * moderately expensive. 1528 */ 1529 struct intel_mipmap_tree *last_mt; 1530 if (num_samples == 0) 1531 last_mt = rb->mt; 1532 else 1533 last_mt = rb->singlesample_mt; 1534 1535 uint32_t old_name = 0; 1536 if (last_mt) { 1537 /* The bo already has a name because the miptree was created by a 1538 * previous call to intel_process_dri2_buffer(). If a bo already has a 1539 * name, then drm_intel_bo_flink() is a low-cost getter. It does not 1540 * create a new name. 1541 */ 1542 drm_intel_bo_flink(last_mt->bo, &old_name); 1543 } 1544 1545 if (old_name == buffer->name) 1546 return; 1547 1548 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { 1549 fprintf(stderr, 1550 "attaching buffer %d, at %d, cpp %d, pitch %d\n", 1551 buffer->name, buffer->attachment, 1552 buffer->cpp, buffer->pitch); 1553 } 1554 1555 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name, 1556 buffer->name); 1557 if (!bo) { 1558 fprintf(stderr, 1559 "Failed to open BO for returned DRI2 buffer " 1560 "(%dx%d, %s, named %d).\n" 1561 "This is likely a bug in the X Server that will lead to a " 1562 "crash soon.\n", 1563 drawable->w, drawable->h, buffer_name, buffer->name); 1564 return; 1565 } 1566 1567 intel_update_winsys_renderbuffer_miptree(brw, rb, bo, 1568 drawable->w, drawable->h, 1569 buffer->pitch); 1570 1571 if (_mesa_is_front_buffer_drawing(fb) && 1572 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || 1573 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && 1574 rb->Base.Base.NumSamples > 1) { 1575 intel_renderbuffer_upsample(brw, rb); 1576 } 1577 1578 assert(rb->mt); 1579 1580 drm_intel_bo_unreference(bo); 1581} 1582 1583/** 1584 * \brief Query DRI image loader to obtain a DRIdrawable's buffers. 1585 * 1586 * To determine which DRI buffers to request, examine the renderbuffers 1587 * attached to the drawable's framebuffer. Then request the buffers from 1588 * the image loader 1589 * 1590 * This is called from intel_update_renderbuffers(). 1591 * 1592 * \param drawable Drawable whose buffers are queried. 1593 * \param buffers [out] List of buffers returned by DRI2 query. 1594 * \param buffer_count [out] Number of buffers returned. 1595 * 1596 * \see intel_update_renderbuffers() 1597 */ 1598 1599static void 1600intel_update_image_buffer(struct brw_context *intel, 1601 __DRIdrawable *drawable, 1602 struct intel_renderbuffer *rb, 1603 __DRIimage *buffer, 1604 enum __DRIimageBufferMask buffer_type) 1605{ 1606 struct gl_framebuffer *fb = drawable->driverPrivate; 1607 1608 if (!rb || !buffer->bo) 1609 return; 1610 1611 unsigned num_samples = rb->Base.Base.NumSamples; 1612 1613 /* Check and see if we're already bound to the right 1614 * buffer object 1615 */ 1616 struct intel_mipmap_tree *last_mt; 1617 if (num_samples == 0) 1618 last_mt = rb->mt; 1619 else 1620 last_mt = rb->singlesample_mt; 1621 1622 if (last_mt && last_mt->bo == buffer->bo) 1623 return; 1624 1625 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo, 1626 buffer->width, buffer->height, 1627 buffer->pitch); 1628 1629 if (_mesa_is_front_buffer_drawing(fb) && 1630 buffer_type == __DRI_IMAGE_BUFFER_FRONT && 1631 rb->Base.Base.NumSamples > 1) { 1632 intel_renderbuffer_upsample(intel, rb); 1633 } 1634} 1635 1636static void 1637intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1638{ 1639 struct gl_framebuffer *fb = drawable->driverPrivate; 1640 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1641 struct intel_renderbuffer *front_rb; 1642 struct intel_renderbuffer *back_rb; 1643 struct __DRIimageList images; 1644 unsigned int format; 1645 uint32_t buffer_mask = 0; 1646 1647 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1648 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1649 1650 if (back_rb) 1651 format = intel_rb_format(back_rb); 1652 else if (front_rb) 1653 format = intel_rb_format(front_rb); 1654 else 1655 return; 1656 1657 if (front_rb && (_mesa_is_front_buffer_drawing(fb) || 1658 _mesa_is_front_buffer_reading(fb) || !back_rb)) { 1659 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; 1660 } 1661 1662 if (back_rb) 1663 buffer_mask |= __DRI_IMAGE_BUFFER_BACK; 1664 1665 (*screen->image.loader->getBuffers) (drawable, 1666 driGLFormatToImageFormat(format), 1667 &drawable->dri2.stamp, 1668 drawable->loaderPrivate, 1669 buffer_mask, 1670 &images); 1671 1672 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { 1673 drawable->w = images.front->width; 1674 drawable->h = images.front->height; 1675 intel_update_image_buffer(brw, 1676 drawable, 1677 front_rb, 1678 images.front, 1679 __DRI_IMAGE_BUFFER_FRONT); 1680 } 1681 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) { 1682 drawable->w = images.back->width; 1683 drawable->h = images.back->height; 1684 intel_update_image_buffer(brw, 1685 drawable, 1686 back_rb, 1687 images.back, 1688 __DRI_IMAGE_BUFFER_BACK); 1689 } 1690} 1691