brw_context.c revision a9e6a56a02155f0da5e5bfa1a4d188f3d6195066
1/* 2 Copyright 2003 VMware, Inc. 3 Copyright (C) Intel Corp. 2006. All Rights Reserved. 4 Intel funded Tungsten Graphics to 5 develop this 3D driver. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 "Software"), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice (including the 16 next paragraph) shall be included in all copies or substantial 17 portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 27 **********************************************************************/ 28 /* 29 * Authors: 30 * Keith Whitwell <keithw@vmware.com> 31 */ 32 33 34#include "main/api_exec.h" 35#include "main/context.h" 36#include "main/fbobject.h" 37#include "main/extensions.h" 38#include "main/imports.h" 39#include "main/macros.h" 40#include "main/points.h" 41#include "main/version.h" 42#include "main/vtxfmt.h" 43#include "main/texobj.h" 44#include "main/framebuffer.h" 45 46#include "vbo/vbo_context.h" 47 48#include "drivers/common/driverfuncs.h" 49#include "drivers/common/meta.h" 50#include "utils.h" 51 52#include "brw_context.h" 53#include "brw_defines.h" 54#include "brw_compiler.h" 55#include "brw_draw.h" 56#include "brw_state.h" 57 58#include "intel_batchbuffer.h" 59#include "intel_buffer_objects.h" 60#include "intel_buffers.h" 61#include "intel_fbo.h" 62#include "intel_mipmap_tree.h" 63#include "intel_pixel.h" 64#include "intel_image.h" 65#include "intel_tex.h" 66#include "intel_tex_obj.h" 67 68#include "swrast_setup/swrast_setup.h" 69#include "tnl/tnl.h" 70#include "tnl/t_pipeline.h" 71#include "util/ralloc.h" 72#include "util/debug.h" 73 74/*************************************** 75 * Mesa's Driver Functions 76 ***************************************/ 77 78static size_t 79brw_query_samples_for_format(struct gl_context *ctx, GLenum target, 80 GLenum internalFormat, int samples[16]) 81{ 82 struct brw_context *brw = brw_context(ctx); 83 84 (void) target; 85 86 switch (brw->gen) { 87 case 9: 88 samples[0] = 16; 89 samples[1] = 8; 90 samples[2] = 4; 91 samples[3] = 2; 92 return 4; 93 94 case 8: 95 samples[0] = 8; 96 samples[1] = 4; 97 samples[2] = 2; 98 return 3; 99 100 case 7: 101 samples[0] = 8; 102 samples[1] = 4; 103 return 2; 104 105 case 6: 106 samples[0] = 4; 107 return 1; 108 109 default: 110 assert(brw->gen < 6); 111 samples[0] = 1; 112 return 1; 113 } 114} 115 116const char *const brw_vendor_string = "Intel Open Source Technology Center"; 117 118const char * 119brw_get_renderer_string(unsigned deviceID) 120{ 121 const char *chipset; 122 static char buffer[128]; 123 124 switch (deviceID) { 125#undef CHIPSET 126#define CHIPSET(id, symbol, str) case id: chipset = str; break; 127#include "pci_ids/i965_pci_ids.h" 128 default: 129 chipset = "Unknown Intel Chipset"; 130 break; 131 } 132 133 (void) driGetRendererString(buffer, chipset, 0); 134 return buffer; 135} 136 137static const GLubyte * 138intel_get_string(struct gl_context * ctx, GLenum name) 139{ 140 const struct brw_context *const brw = brw_context(ctx); 141 142 switch (name) { 143 case GL_VENDOR: 144 return (GLubyte *) brw_vendor_string; 145 146 case GL_RENDERER: 147 return 148 (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID); 149 150 default: 151 return NULL; 152 } 153} 154 155static void 156intel_viewport(struct gl_context *ctx) 157{ 158 struct brw_context *brw = brw_context(ctx); 159 __DRIcontext *driContext = brw->driContext; 160 161 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 162 dri2InvalidateDrawable(driContext->driDrawablePriv); 163 dri2InvalidateDrawable(driContext->driReadablePriv); 164 } 165} 166 167static void 168intel_update_state(struct gl_context * ctx, GLuint new_state) 169{ 170 struct brw_context *brw = brw_context(ctx); 171 struct intel_texture_object *tex_obj; 172 struct intel_renderbuffer *depth_irb; 173 174 if (ctx->swrast_context) 175 _swrast_InvalidateState(ctx, new_state); 176 _vbo_InvalidateState(ctx, new_state); 177 178 brw->NewGLState |= new_state; 179 180 _mesa_unlock_context_textures(ctx); 181 182 /* Resolve the depth buffer's HiZ buffer. */ 183 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 184 if (depth_irb) 185 intel_renderbuffer_resolve_hiz(brw, depth_irb); 186 187 /* Resolve depth buffer and render cache of each enabled texture. */ 188 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; 189 for (int i = 0; i <= maxEnabledUnit; i++) { 190 if (!ctx->Texture.Unit[i]._Current) 191 continue; 192 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 193 if (!tex_obj || !tex_obj->mt) 194 continue; 195 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); 196 intel_miptree_resolve_color(brw, tex_obj->mt); 197 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); 198 } 199 200 _mesa_lock_context_textures(ctx); 201} 202 203#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) 204 205static void 206intel_flush_front(struct gl_context *ctx) 207{ 208 struct brw_context *brw = brw_context(ctx); 209 __DRIcontext *driContext = brw->driContext; 210 __DRIdrawable *driDrawable = driContext->driDrawablePriv; 211 __DRIscreen *const screen = brw->intelScreen->driScrnPriv; 212 213 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { 214 if (flushFront(screen) && driDrawable && 215 driDrawable->loaderPrivate) { 216 217 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. 218 * 219 * This potentially resolves both front and back buffer. It 220 * is unnecessary to resolve the back, but harms nothing except 221 * performance. And no one cares about front-buffer render 222 * performance. 223 */ 224 intel_resolve_for_dri2_flush(brw, driDrawable); 225 intel_batchbuffer_flush(brw); 226 227 flushFront(screen)(driDrawable, driDrawable->loaderPrivate); 228 229 /* We set the dirty bit in intel_prepare_render() if we're 230 * front buffer rendering once we get there. 231 */ 232 brw->front_buffer_dirty = false; 233 } 234 } 235} 236 237static void 238intel_glFlush(struct gl_context *ctx) 239{ 240 struct brw_context *brw = brw_context(ctx); 241 242 intel_batchbuffer_flush(brw); 243 intel_flush_front(ctx); 244 245 brw->need_flush_throttle = true; 246} 247 248static void 249intel_finish(struct gl_context * ctx) 250{ 251 struct brw_context *brw = brw_context(ctx); 252 253 intel_glFlush(ctx); 254 255 if (brw->batch.last_bo) 256 drm_intel_bo_wait_rendering(brw->batch.last_bo); 257} 258 259static void 260brw_init_driver_functions(struct brw_context *brw, 261 struct dd_function_table *functions) 262{ 263 _mesa_init_driver_functions(functions); 264 265 /* GLX uses DRI2 invalidate events to handle window resizing. 266 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib), 267 * which doesn't provide a mechanism for snooping the event queues. 268 * 269 * So EGL still relies on viewport hacks to handle window resizing. 270 * This should go away with DRI3000. 271 */ 272 if (!brw->driContext->driScreenPriv->dri2.useInvalidate) 273 functions->Viewport = intel_viewport; 274 275 functions->Flush = intel_glFlush; 276 functions->Finish = intel_finish; 277 functions->GetString = intel_get_string; 278 functions->UpdateState = intel_update_state; 279 280 intelInitTextureFuncs(functions); 281 intelInitTextureImageFuncs(functions); 282 intelInitTextureSubImageFuncs(functions); 283 intelInitTextureCopyImageFuncs(functions); 284 intelInitCopyImageFuncs(functions); 285 intelInitClearFuncs(functions); 286 intelInitBufferFuncs(functions); 287 intelInitPixelFuncs(functions); 288 intelInitBufferObjectFuncs(functions); 289 intel_init_syncobj_functions(functions); 290 brw_init_object_purgeable_functions(functions); 291 292 brwInitFragProgFuncs( functions ); 293 brw_init_common_queryobj_functions(functions); 294 if (brw->gen >= 6) 295 gen6_init_queryobj_functions(functions); 296 else 297 gen4_init_queryobj_functions(functions); 298 brw_init_compute_functions(functions); 299 if (brw->gen >= 7) 300 brw_init_conditional_render_functions(functions); 301 302 functions->QuerySamplesForFormat = brw_query_samples_for_format; 303 304 functions->NewTransformFeedback = brw_new_transform_feedback; 305 functions->DeleteTransformFeedback = brw_delete_transform_feedback; 306 functions->GetTransformFeedbackVertexCount = 307 brw_get_transform_feedback_vertex_count; 308 if (brw->gen >= 7) { 309 functions->BeginTransformFeedback = gen7_begin_transform_feedback; 310 functions->EndTransformFeedback = gen7_end_transform_feedback; 311 functions->PauseTransformFeedback = gen7_pause_transform_feedback; 312 functions->ResumeTransformFeedback = gen7_resume_transform_feedback; 313 } else { 314 functions->BeginTransformFeedback = brw_begin_transform_feedback; 315 functions->EndTransformFeedback = brw_end_transform_feedback; 316 } 317 318 if (brw->gen >= 6) 319 functions->GetSamplePosition = gen6_get_sample_position; 320} 321 322static void 323brw_initialize_context_constants(struct brw_context *brw) 324{ 325 struct gl_context *ctx = &brw->ctx; 326 const struct brw_compiler *compiler = brw->intelScreen->compiler; 327 328 const bool stage_exists[MESA_SHADER_STAGES] = { 329 [MESA_SHADER_VERTEX] = true, 330 [MESA_SHADER_TESS_CTRL] = false, 331 [MESA_SHADER_TESS_EVAL] = false, 332 [MESA_SHADER_GEOMETRY] = brw->gen >= 6, 333 [MESA_SHADER_FRAGMENT] = true, 334 [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader, 335 }; 336 337 unsigned num_stages = 0; 338 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 339 if (stage_exists[i]) 340 num_stages++; 341 } 342 343 unsigned max_samplers = 344 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; 345 346 ctx->Const.MaxDualSourceDrawBuffers = 1; 347 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; 348 ctx->Const.MaxCombinedShaderOutputResources = 349 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; 350 351 ctx->Const.QueryCounterBits.Timestamp = 36; 352 353 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ 354 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; 355 ctx->Const.MaxRenderbufferSize = 8192; 356 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); 357 ctx->Const.Max3DTextureLevels = 12; /* 2048 */ 358 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ 359 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; 360 ctx->Const.MaxTextureMbytes = 1536; 361 ctx->Const.MaxTextureRectSize = 1 << 12; 362 ctx->Const.MaxTextureMaxAnisotropy = 16.0; 363 ctx->Const.StripTextureBorder = true; 364 if (brw->gen >= 7) 365 ctx->Const.MaxProgramTextureGatherComponents = 4; 366 else if (brw->gen == 6) 367 ctx->Const.MaxProgramTextureGatherComponents = 1; 368 369 ctx->Const.MaxUniformBlockSize = 65536; 370 371 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 372 struct gl_program_constants *prog = &ctx->Const.Program[i]; 373 374 if (!stage_exists[i]) 375 continue; 376 377 prog->MaxTextureImageUnits = max_samplers; 378 379 prog->MaxUniformBlocks = BRW_MAX_UBO; 380 prog->MaxCombinedUniformComponents = 381 prog->MaxUniformComponents + 382 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; 383 384 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; 385 prog->MaxAtomicBuffers = BRW_MAX_ABO; 386 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; 387 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; 388 } 389 390 ctx->Const.MaxTextureUnits = 391 MIN2(ctx->Const.MaxTextureCoordUnits, 392 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); 393 394 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; 395 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; 396 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; 397 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; 398 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; 399 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; 400 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; 401 402 403 /* Hardware only supports a limited number of transform feedback buffers. 404 * So we need to override the Mesa default (which is based only on software 405 * limits). 406 */ 407 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; 408 409 /* On Gen6, in the worst case, we use up one binding table entry per 410 * transform feedback component (see comments above the definition of 411 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value 412 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to 413 * BRW_MAX_SOL_BINDINGS. 414 * 415 * In "separate components" mode, we need to divide this value by 416 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries 417 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. 418 */ 419 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; 420 ctx->Const.MaxTransformFeedbackSeparateComponents = 421 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; 422 423 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true; 424 425 int max_samples; 426 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); 427 const int clamp_max_samples = 428 driQueryOptioni(&brw->optionCache, "clamp_max_samples"); 429 430 if (clamp_max_samples < 0) { 431 max_samples = msaa_modes[0]; 432 } else { 433 /* Select the largest supported MSAA mode that does not exceed 434 * clamp_max_samples. 435 */ 436 max_samples = 0; 437 for (int i = 0; msaa_modes[i] != 0; ++i) { 438 if (msaa_modes[i] <= clamp_max_samples) { 439 max_samples = msaa_modes[i]; 440 break; 441 } 442 } 443 } 444 445 ctx->Const.MaxSamples = max_samples; 446 ctx->Const.MaxColorTextureSamples = max_samples; 447 ctx->Const.MaxDepthTextureSamples = max_samples; 448 ctx->Const.MaxIntegerSamples = max_samples; 449 ctx->Const.MaxImageSamples = 0; 450 451 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used 452 * to map indices of rectangular grid to sample numbers within a pixel. 453 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled 454 * extension implementation. For more details see the comment above 455 * gen6_set_sample_maps() definition. 456 */ 457 gen6_set_sample_maps(ctx); 458 459 ctx->Const.MinLineWidth = 1.0; 460 ctx->Const.MinLineWidthAA = 1.0; 461 if (brw->gen >= 6) { 462 ctx->Const.MaxLineWidth = 7.375; 463 ctx->Const.MaxLineWidthAA = 7.375; 464 ctx->Const.LineWidthGranularity = 0.125; 465 } else { 466 ctx->Const.MaxLineWidth = 7.0; 467 ctx->Const.MaxLineWidthAA = 7.0; 468 ctx->Const.LineWidthGranularity = 0.5; 469 } 470 471 /* For non-antialiased lines, we have to round the line width to the 472 * nearest whole number. Make sure that we don't advertise a line 473 * width that, when rounded, will be beyond the actual hardware 474 * maximum. 475 */ 476 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); 477 478 ctx->Const.MinPointSize = 1.0; 479 ctx->Const.MinPointSizeAA = 1.0; 480 ctx->Const.MaxPointSize = 255.0; 481 ctx->Const.MaxPointSizeAA = 255.0; 482 ctx->Const.PointSizeGranularity = 1.0; 483 484 if (brw->gen >= 5 || brw->is_g4x) 485 ctx->Const.MaxClipPlanes = 8; 486 487 ctx->Const.LowerTessLevel = true; 488 489 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; 490 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; 491 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; 492 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0; 493 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0; 494 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0; 495 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0; 496 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16; 497 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256; 498 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1; 499 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024; 500 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams = 501 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters, 502 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams); 503 504 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024; 505 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024; 506 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024; 507 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024; 508 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12; 509 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256; 510 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0; 511 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024; 512 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams = 513 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters, 514 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams); 515 516 /* Fragment shaders use real, 32-bit twos-complement integers for all 517 * integer types. 518 */ 519 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31; 520 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30; 521 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0; 522 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 523 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 524 525 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; 526 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; 527 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; 528 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 529 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 530 531 /* Gen6 converts quads to polygon in beginning of 3D pipeline, 532 * but we're not sure how it's actually done for vertex order, 533 * that affect provoking vertex decision. Always use last vertex 534 * convention for quad primitive which works as expected for now. 535 */ 536 if (brw->gen >= 6) 537 ctx->Const.QuadsFollowProvokingVertexConvention = false; 538 539 ctx->Const.NativeIntegers = true; 540 ctx->Const.VertexID_is_zero_based = true; 541 542 /* Regarding the CMP instruction, the Ivybridge PRM says: 543 * 544 * "For each enabled channel 0b or 1b is assigned to the appropriate flag 545 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord 546 * 0xFFFFFFFF) is assigned to dst." 547 * 548 * but PRMs for earlier generations say 549 * 550 * "In dword format, one GRF may store up to 8 results. When the register 551 * is used later as a vector of Booleans, as only LSB at each channel 552 * contains meaning [sic] data, software should make sure all higher bits 553 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." 554 * 555 * We select the representation of a true boolean uniform to be ~0, and fix 556 * the results of Gen <= 5 CMP instruction's with -(result & 1). 557 */ 558 ctx->Const.UniformBooleanTrue = ~0; 559 560 /* From the gen4 PRM, volume 4 page 127: 561 * 562 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies 563 * the base address of the first element of the surface, computed in 564 * software by adding the surface base address to the byte offset of 565 * the element in the buffer." 566 * 567 * However, unaligned accesses are slower, so enforce buffer alignment. 568 */ 569 ctx->Const.UniformBufferOffsetAlignment = 16; 570 571 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so 572 * that we can safely have the CPU and GPU writing the same SSBO on 573 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never 574 * writes, so there's no problem. For an SSBO, the GPU and the CPU can 575 * be updating disjoint regions of the buffer simultaneously and that will 576 * break if the regions overlap the same cacheline. 577 */ 578 ctx->Const.ShaderStorageBufferOffsetAlignment = 64; 579 ctx->Const.TextureBufferOffsetAlignment = 16; 580 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; 581 582 if (brw->gen >= 6) { 583 ctx->Const.MaxVarying = 32; 584 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; 585 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64; 586 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; 587 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; 588 } 589 590 /* We want the GLSL compiler to emit code that uses condition codes */ 591 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 592 ctx->Const.ShaderCompilerOptions[i] = 593 brw->intelScreen->compiler->glsl_compiler_options[i]; 594 } 595 596 /* ARB_viewport_array */ 597 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { 598 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; 599 ctx->Const.ViewportSubpixelBits = 0; 600 601 /* Cast to float before negating because MaxViewportWidth is unsigned. 602 */ 603 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; 604 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; 605 } 606 607 /* ARB_gpu_shader5 */ 608 if (brw->gen >= 7) 609 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); 610 611 /* ARB_framebuffer_no_attachments */ 612 ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth; 613 ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight; 614 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; 615 ctx->Const.MaxFramebufferSamples = max_samples; 616} 617 618static void 619brw_adjust_cs_context_constants(struct brw_context *brw) 620{ 621 struct gl_context *ctx = &brw->ctx; 622 623 /* For ES, we set these constants based on SIMD8. 624 * 625 * TODO: Once we can always generate SIMD16, we should update this. 626 * 627 * For GL, we assume we can generate a SIMD16 program, but this currently 628 * is not always true. This allows us to run more test cases, and will be 629 * required based on desktop GL compute shader requirements. 630 */ 631 const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; 632 633 const uint32_t max_invocations = simd_size * brw->max_cs_threads; 634 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; 635 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; 636 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; 637 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; 638} 639 640/** 641 * Process driconf (drirc) options, setting appropriate context flags. 642 * 643 * intelInitExtensions still pokes at optionCache directly, in order to 644 * avoid advertising various extensions. No flags are set, so it makes 645 * sense to continue doing that there. 646 */ 647static void 648brw_process_driconf_options(struct brw_context *brw) 649{ 650 struct gl_context *ctx = &brw->ctx; 651 652 driOptionCache *options = &brw->optionCache; 653 driParseConfigFiles(options, &brw->intelScreen->optionCache, 654 brw->driContext->driScreenPriv->myNum, "i965"); 655 656 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); 657 switch (bo_reuse_mode) { 658 case DRI_CONF_BO_REUSE_DISABLED: 659 break; 660 case DRI_CONF_BO_REUSE_ALL: 661 intel_bufmgr_gem_enable_reuse(brw->bufmgr); 662 break; 663 } 664 665 if (!driQueryOptionb(options, "hiz")) { 666 brw->has_hiz = false; 667 /* On gen6, you can only do separate stencil with HIZ. */ 668 if (brw->gen == 6) 669 brw->has_separate_stencil = false; 670 } 671 672 if (driQueryOptionb(options, "always_flush_batch")) { 673 fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); 674 brw->always_flush_batch = true; 675 } 676 677 if (driQueryOptionb(options, "always_flush_cache")) { 678 fprintf(stderr, "flushing GPU caches before/after each draw call\n"); 679 brw->always_flush_cache = true; 680 } 681 682 if (driQueryOptionb(options, "disable_throttling")) { 683 fprintf(stderr, "disabling flush throttling\n"); 684 brw->disable_throttling = true; 685 } 686 687 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); 688 689 ctx->Const.ForceGLSLExtensionsWarn = 690 driQueryOptionb(options, "force_glsl_extensions_warn"); 691 692 ctx->Const.DisableGLSLLineContinuations = 693 driQueryOptionb(options, "disable_glsl_line_continuations"); 694 695 ctx->Const.AllowGLSLExtensionDirectiveMidShader = 696 driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); 697} 698 699GLboolean 700brwCreateContext(gl_api api, 701 const struct gl_config *mesaVis, 702 __DRIcontext *driContextPriv, 703 unsigned major_version, 704 unsigned minor_version, 705 uint32_t flags, 706 bool notify_reset, 707 unsigned *dri_ctx_error, 708 void *sharedContextPrivate) 709{ 710 __DRIscreen *sPriv = driContextPriv->driScreenPriv; 711 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; 712 struct intel_screen *screen = sPriv->driverPrivate; 713 const struct brw_device_info *devinfo = screen->devinfo; 714 struct dd_function_table functions; 715 716 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel 717 * provides us with context reset notifications. 718 */ 719 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG 720 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE; 721 722 if (screen->has_context_reset_notification) 723 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; 724 725 if (flags & ~allowed_flags) { 726 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; 727 return false; 728 } 729 730 struct brw_context *brw = rzalloc(NULL, struct brw_context); 731 if (!brw) { 732 fprintf(stderr, "%s: failed to alloc context\n", __func__); 733 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 734 return false; 735 } 736 737 driContextPriv->driverPrivate = brw; 738 brw->driContext = driContextPriv; 739 brw->intelScreen = screen; 740 brw->bufmgr = screen->bufmgr; 741 742 brw->gen = devinfo->gen; 743 brw->gt = devinfo->gt; 744 brw->is_g4x = devinfo->is_g4x; 745 brw->is_baytrail = devinfo->is_baytrail; 746 brw->is_haswell = devinfo->is_haswell; 747 brw->is_cherryview = devinfo->is_cherryview; 748 brw->is_broxton = devinfo->is_broxton; 749 brw->has_llc = devinfo->has_llc; 750 brw->has_hiz = devinfo->has_hiz_and_separate_stencil; 751 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; 752 brw->has_pln = devinfo->has_pln; 753 brw->has_compr4 = devinfo->has_compr4; 754 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; 755 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; 756 brw->needs_unlit_centroid_workaround = 757 devinfo->needs_unlit_centroid_workaround; 758 759 brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; 760 brw->has_swizzling = screen->hw_has_swizzling; 761 762 brw->vs.base.stage = MESA_SHADER_VERTEX; 763 brw->gs.base.stage = MESA_SHADER_GEOMETRY; 764 brw->wm.base.stage = MESA_SHADER_FRAGMENT; 765 if (brw->gen >= 8) { 766 gen8_init_vtable_surface_functions(brw); 767 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; 768 } else if (brw->gen >= 7) { 769 gen7_init_vtable_surface_functions(brw); 770 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; 771 } else if (brw->gen >= 6) { 772 gen6_init_vtable_surface_functions(brw); 773 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; 774 } else { 775 gen4_init_vtable_surface_functions(brw); 776 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; 777 } 778 779 brw_init_driver_functions(brw, &functions); 780 781 if (notify_reset) 782 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; 783 784 struct gl_context *ctx = &brw->ctx; 785 786 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { 787 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 788 fprintf(stderr, "%s: failed to init mesa context\n", __func__); 789 intelDestroyContext(driContextPriv); 790 return false; 791 } 792 793 driContextSetFlags(ctx, flags); 794 795 /* Initialize the software rasterizer and helper modules. 796 * 797 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for 798 * software fallbacks (which we have to support on legacy GL to do weird 799 * glDrawPixels(), glBitmap(), and other functions). 800 */ 801 if (api != API_OPENGL_CORE && api != API_OPENGLES2) { 802 _swrast_CreateContext(ctx); 803 } 804 805 _vbo_CreateContext(ctx); 806 if (ctx->swrast_context) { 807 _tnl_CreateContext(ctx); 808 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; 809 _swsetup_CreateContext(ctx); 810 811 /* Configure swrast to match hardware characteristics: */ 812 _swrast_allow_pixel_fog(ctx, false); 813 _swrast_allow_vertex_fog(ctx, true); 814 } 815 816 _mesa_meta_init(ctx); 817 818 brw_process_driconf_options(brw); 819 820 if (INTEL_DEBUG & DEBUG_PERF) 821 brw->perf_debug = true; 822 823 brw_initialize_context_constants(brw); 824 825 ctx->Const.ResetStrategy = notify_reset 826 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; 827 828 /* Reinitialize the context point state. It depends on ctx->Const values. */ 829 _mesa_init_point(ctx); 830 831 intel_fbo_init(brw); 832 833 intel_batchbuffer_init(brw); 834 835 if (brw->gen >= 6) { 836 /* Create a new hardware context. Using a hardware context means that 837 * our GPU state will be saved/restored on context switch, allowing us 838 * to assume that the GPU is in the same state we left it in. 839 * 840 * This is required for transform feedback buffer offsets, query objects, 841 * and also allows us to reduce how much state we have to emit. 842 */ 843 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); 844 845 if (!brw->hw_ctx) { 846 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); 847 intelDestroyContext(driContextPriv); 848 return false; 849 } 850 } 851 852 if (brw_init_pipe_control(brw, devinfo)) { 853 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 854 intelDestroyContext(driContextPriv); 855 return false; 856 } 857 858 brw_init_state(brw); 859 860 intelInitExtensions(ctx); 861 862 brw_init_surface_formats(brw); 863 864 brw->max_vs_threads = devinfo->max_vs_threads; 865 brw->max_hs_threads = devinfo->max_hs_threads; 866 brw->max_ds_threads = devinfo->max_ds_threads; 867 brw->max_gs_threads = devinfo->max_gs_threads; 868 brw->max_wm_threads = devinfo->max_wm_threads; 869 brw->max_cs_threads = devinfo->max_cs_threads; 870 brw->urb.size = devinfo->urb.size; 871 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; 872 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; 873 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; 874 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; 875 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; 876 877 brw_adjust_cs_context_constants(brw); 878 879 /* Estimate the size of the mappable aperture into the GTT. There's an 880 * ioctl to get the whole GTT size, but not one to get the mappable subset. 881 * It turns out it's basically always 256MB, though some ancient hardware 882 * was smaller. 883 */ 884 uint32_t gtt_size = 256 * 1024 * 1024; 885 886 /* We don't want to map two objects such that a memcpy between them would 887 * just fault one mapping in and then the other over and over forever. So 888 * we would need to divide the GTT size by 2. Additionally, some GTT is 889 * taken up by things like the framebuffer and the ringbuffer and such, so 890 * be more conservative. 891 */ 892 brw->max_gtt_map_object_size = gtt_size / 4; 893 894 if (brw->gen == 6) 895 brw->urb.gs_present = false; 896 897 brw->prim_restart.in_progress = false; 898 brw->prim_restart.enable_cut_index = false; 899 brw->gs.enabled = false; 900 brw->sf.viewport_transform_enable = true; 901 902 brw->predicate.state = BRW_PREDICATE_STATE_RENDER; 903 904 brw->use_resource_streamer = screen->has_resource_streamer && 905 (env_var_as_boolean("INTEL_USE_HW_BT", false) || 906 env_var_as_boolean("INTEL_USE_GATHER", false)); 907 908 ctx->VertexProgram._MaintainTnlProgram = true; 909 ctx->FragmentProgram._MaintainTexEnvProgram = true; 910 911 brw_draw_init( brw ); 912 913 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { 914 /* Turn on some extra GL_ARB_debug_output generation. */ 915 brw->perf_debug = true; 916 } 917 918 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) 919 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; 920 921 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 922 brw_init_shader_time(brw); 923 924 _mesa_compute_version(ctx); 925 926 _mesa_initialize_dispatch_tables(ctx); 927 _mesa_initialize_vbo_vtxfmt(ctx); 928 929 if (ctx->Extensions.AMD_performance_monitor) { 930 brw_init_performance_monitors(brw); 931 } 932 933 vbo_use_buffer_objects(ctx); 934 vbo_always_unmap_buffers(ctx); 935 936 return true; 937} 938 939void 940intelDestroyContext(__DRIcontext * driContextPriv) 941{ 942 struct brw_context *brw = 943 (struct brw_context *) driContextPriv->driverPrivate; 944 struct gl_context *ctx = &brw->ctx; 945 946 /* Dump a final BMP in case the application doesn't call SwapBuffers */ 947 if (INTEL_DEBUG & DEBUG_AUB) { 948 intel_batchbuffer_flush(brw); 949 aub_dump_bmp(&brw->ctx); 950 } 951 952 _mesa_meta_free(&brw->ctx); 953 brw_meta_fast_clear_free(brw); 954 955 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 956 /* Force a report. */ 957 brw->shader_time.report_time = 0; 958 959 brw_collect_and_report_shader_time(brw); 960 brw_destroy_shader_time(brw); 961 } 962 963 brw_destroy_state(brw); 964 brw_draw_destroy(brw); 965 966 drm_intel_bo_unreference(brw->curbe.curbe_bo); 967 if (brw->vs.base.scratch_bo) 968 drm_intel_bo_unreference(brw->vs.base.scratch_bo); 969 if (brw->gs.base.scratch_bo) 970 drm_intel_bo_unreference(brw->gs.base.scratch_bo); 971 if (brw->wm.base.scratch_bo) 972 drm_intel_bo_unreference(brw->wm.base.scratch_bo); 973 974 gen7_reset_hw_bt_pool_offsets(brw); 975 drm_intel_bo_unreference(brw->hw_bt_pool.bo); 976 brw->hw_bt_pool.bo = NULL; 977 978 drm_intel_gem_context_destroy(brw->hw_ctx); 979 980 if (ctx->swrast_context) { 981 _swsetup_DestroyContext(&brw->ctx); 982 _tnl_DestroyContext(&brw->ctx); 983 } 984 _vbo_DestroyContext(&brw->ctx); 985 986 if (ctx->swrast_context) 987 _swrast_DestroyContext(&brw->ctx); 988 989 brw_fini_pipe_control(brw); 990 intel_batchbuffer_free(brw); 991 992 drm_intel_bo_unreference(brw->throttle_batch[1]); 993 drm_intel_bo_unreference(brw->throttle_batch[0]); 994 brw->throttle_batch[1] = NULL; 995 brw->throttle_batch[0] = NULL; 996 997 driDestroyOptionCache(&brw->optionCache); 998 999 /* free the Mesa context */ 1000 _mesa_free_context_data(&brw->ctx); 1001 1002 ralloc_free(brw); 1003 driContextPriv->driverPrivate = NULL; 1004} 1005 1006GLboolean 1007intelUnbindContext(__DRIcontext * driContextPriv) 1008{ 1009 /* Unset current context and dispath table */ 1010 _mesa_make_current(NULL, NULL, NULL); 1011 1012 return true; 1013} 1014 1015/** 1016 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior 1017 * on window system framebuffers. 1018 * 1019 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if 1020 * your renderbuffer can do sRGB encode, and you can flip a switch that does 1021 * sRGB encode if the renderbuffer can handle it. You can ask specifically 1022 * for a visual where you're guaranteed to be capable, but it turns out that 1023 * everyone just makes all their ARGB8888 visuals capable and doesn't offer 1024 * incapable ones, because there's no difference between the two in resources 1025 * used. Applications thus get built that accidentally rely on the default 1026 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds 1027 * great... 1028 * 1029 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode 1030 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. 1031 * So they removed the enable knob and made it "if the renderbuffer is sRGB 1032 * capable, do sRGB encode". Then, for your window system renderbuffers, you 1033 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals 1034 * and get no sRGB encode (assuming that both kinds of visual are available). 1035 * Thus our choice to support sRGB by default on our visuals for desktop would 1036 * result in broken rendering of GLES apps that aren't expecting sRGB encode. 1037 * 1038 * Unfortunately, renderbuffer setup happens before a context is created. So 1039 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 1040 * context (without an sRGB visual, though we don't have sRGB visuals exposed 1041 * yet), we go turn that back off before anyone finds out. 1042 */ 1043static void 1044intel_gles3_srgb_workaround(struct brw_context *brw, 1045 struct gl_framebuffer *fb) 1046{ 1047 struct gl_context *ctx = &brw->ctx; 1048 1049 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) 1050 return; 1051 1052 /* Some day when we support the sRGB capable bit on visuals available for 1053 * GLES, we'll need to respect that and not disable things here. 1054 */ 1055 fb->Visual.sRGBCapable = false; 1056 for (int i = 0; i < BUFFER_COUNT; i++) { 1057 if (fb->Attachment[i].Renderbuffer && 1058 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) { 1059 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM; 1060 } 1061 } 1062} 1063 1064GLboolean 1065intelMakeCurrent(__DRIcontext * driContextPriv, 1066 __DRIdrawable * driDrawPriv, 1067 __DRIdrawable * driReadPriv) 1068{ 1069 struct brw_context *brw; 1070 GET_CURRENT_CONTEXT(curCtx); 1071 1072 if (driContextPriv) 1073 brw = (struct brw_context *) driContextPriv->driverPrivate; 1074 else 1075 brw = NULL; 1076 1077 /* According to the glXMakeCurrent() man page: "Pending commands to 1078 * the previous context, if any, are flushed before it is released." 1079 * But only flush if we're actually changing contexts. 1080 */ 1081 if (brw_context(curCtx) && brw_context(curCtx) != brw) { 1082 _mesa_flush(curCtx); 1083 } 1084 1085 if (driContextPriv) { 1086 struct gl_context *ctx = &brw->ctx; 1087 struct gl_framebuffer *fb, *readFb; 1088 1089 if (driDrawPriv == NULL) { 1090 fb = _mesa_get_incomplete_framebuffer(); 1091 } else { 1092 fb = driDrawPriv->driverPrivate; 1093 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; 1094 } 1095 1096 if (driReadPriv == NULL) { 1097 readFb = _mesa_get_incomplete_framebuffer(); 1098 } else { 1099 readFb = driReadPriv->driverPrivate; 1100 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; 1101 } 1102 1103 /* The sRGB workaround changes the renderbuffer's format. We must change 1104 * the format before the renderbuffer's miptree get's allocated, otherwise 1105 * the formats of the renderbuffer and its miptree will differ. 1106 */ 1107 intel_gles3_srgb_workaround(brw, fb); 1108 intel_gles3_srgb_workaround(brw, readFb); 1109 1110 /* If the context viewport hasn't been initialized, force a call out to 1111 * the loader to get buffers so we have a drawable size for the initial 1112 * viewport. */ 1113 if (!brw->ctx.ViewportInitialized) 1114 intel_prepare_render(brw); 1115 1116 _mesa_make_current(ctx, fb, readFb); 1117 } else { 1118 _mesa_make_current(NULL, NULL, NULL); 1119 } 1120 1121 return true; 1122} 1123 1124void 1125intel_resolve_for_dri2_flush(struct brw_context *brw, 1126 __DRIdrawable *drawable) 1127{ 1128 if (brw->gen < 6) { 1129 /* MSAA and fast color clear are not supported, so don't waste time 1130 * checking whether a resolve is needed. 1131 */ 1132 return; 1133 } 1134 1135 struct gl_framebuffer *fb = drawable->driverPrivate; 1136 struct intel_renderbuffer *rb; 1137 1138 /* Usually, only the back buffer will need to be downsampled. However, 1139 * the front buffer will also need it if the user has rendered into it. 1140 */ 1141 static const gl_buffer_index buffers[2] = { 1142 BUFFER_BACK_LEFT, 1143 BUFFER_FRONT_LEFT, 1144 }; 1145 1146 for (int i = 0; i < 2; ++i) { 1147 rb = intel_get_renderbuffer(fb, buffers[i]); 1148 if (rb == NULL || rb->mt == NULL) 1149 continue; 1150 if (rb->mt->num_samples <= 1) 1151 intel_miptree_resolve_color(brw, rb->mt); 1152 else 1153 intel_renderbuffer_downsample(brw, rb); 1154 } 1155} 1156 1157static unsigned 1158intel_bits_per_pixel(const struct intel_renderbuffer *rb) 1159{ 1160 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; 1161} 1162 1163static void 1164intel_query_dri2_buffers(struct brw_context *brw, 1165 __DRIdrawable *drawable, 1166 __DRIbuffer **buffers, 1167 int *count); 1168 1169static void 1170intel_process_dri2_buffer(struct brw_context *brw, 1171 __DRIdrawable *drawable, 1172 __DRIbuffer *buffer, 1173 struct intel_renderbuffer *rb, 1174 const char *buffer_name); 1175 1176static void 1177intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable); 1178 1179static void 1180intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1181{ 1182 struct gl_framebuffer *fb = drawable->driverPrivate; 1183 struct intel_renderbuffer *rb; 1184 __DRIbuffer *buffers = NULL; 1185 int i, count; 1186 const char *region_name; 1187 1188 /* Set this up front, so that in case our buffers get invalidated 1189 * while we're getting new buffers, we don't clobber the stamp and 1190 * thus ignore the invalidate. */ 1191 drawable->lastStamp = drawable->dri2.stamp; 1192 1193 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1194 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1195 1196 intel_query_dri2_buffers(brw, drawable, &buffers, &count); 1197 1198 if (buffers == NULL) 1199 return; 1200 1201 for (i = 0; i < count; i++) { 1202 switch (buffers[i].attachment) { 1203 case __DRI_BUFFER_FRONT_LEFT: 1204 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1205 region_name = "dri2 front buffer"; 1206 break; 1207 1208 case __DRI_BUFFER_FAKE_FRONT_LEFT: 1209 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1210 region_name = "dri2 fake front buffer"; 1211 break; 1212 1213 case __DRI_BUFFER_BACK_LEFT: 1214 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1215 region_name = "dri2 back buffer"; 1216 break; 1217 1218 case __DRI_BUFFER_DEPTH: 1219 case __DRI_BUFFER_HIZ: 1220 case __DRI_BUFFER_DEPTH_STENCIL: 1221 case __DRI_BUFFER_STENCIL: 1222 case __DRI_BUFFER_ACCUM: 1223 default: 1224 fprintf(stderr, 1225 "unhandled buffer attach event, attachment type %d\n", 1226 buffers[i].attachment); 1227 return; 1228 } 1229 1230 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name); 1231 } 1232 1233} 1234 1235void 1236intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) 1237{ 1238 struct brw_context *brw = context->driverPrivate; 1239 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1240 1241 /* Set this up front, so that in case our buffers get invalidated 1242 * while we're getting new buffers, we don't clobber the stamp and 1243 * thus ignore the invalidate. */ 1244 drawable->lastStamp = drawable->dri2.stamp; 1245 1246 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1247 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1248 1249 if (screen->image.loader) 1250 intel_update_image_buffers(brw, drawable); 1251 else 1252 intel_update_dri2_buffers(brw, drawable); 1253 1254 driUpdateFramebufferSize(&brw->ctx, drawable); 1255} 1256 1257/** 1258 * intel_prepare_render should be called anywhere that curent read/drawbuffer 1259 * state is required. 1260 */ 1261void 1262intel_prepare_render(struct brw_context *brw) 1263{ 1264 struct gl_context *ctx = &brw->ctx; 1265 __DRIcontext *driContext = brw->driContext; 1266 __DRIdrawable *drawable; 1267 1268 drawable = driContext->driDrawablePriv; 1269 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { 1270 if (drawable->lastStamp != drawable->dri2.stamp) 1271 intel_update_renderbuffers(driContext, drawable); 1272 driContext->dri2.draw_stamp = drawable->dri2.stamp; 1273 } 1274 1275 drawable = driContext->driReadablePriv; 1276 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { 1277 if (drawable->lastStamp != drawable->dri2.stamp) 1278 intel_update_renderbuffers(driContext, drawable); 1279 driContext->dri2.read_stamp = drawable->dri2.stamp; 1280 } 1281 1282 /* If we're currently rendering to the front buffer, the rendering 1283 * that will happen next will probably dirty the front buffer. So 1284 * mark it as dirty here. 1285 */ 1286 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) 1287 brw->front_buffer_dirty = true; 1288} 1289 1290/** 1291 * \brief Query DRI2 to obtain a DRIdrawable's buffers. 1292 * 1293 * To determine which DRI buffers to request, examine the renderbuffers 1294 * attached to the drawable's framebuffer. Then request the buffers with 1295 * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). 1296 * 1297 * This is called from intel_update_renderbuffers(). 1298 * 1299 * \param drawable Drawable whose buffers are queried. 1300 * \param buffers [out] List of buffers returned by DRI2 query. 1301 * \param buffer_count [out] Number of buffers returned. 1302 * 1303 * \see intel_update_renderbuffers() 1304 * \see DRI2GetBuffers() 1305 * \see DRI2GetBuffersWithFormat() 1306 */ 1307static void 1308intel_query_dri2_buffers(struct brw_context *brw, 1309 __DRIdrawable *drawable, 1310 __DRIbuffer **buffers, 1311 int *buffer_count) 1312{ 1313 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1314 struct gl_framebuffer *fb = drawable->driverPrivate; 1315 int i = 0; 1316 unsigned attachments[8]; 1317 1318 struct intel_renderbuffer *front_rb; 1319 struct intel_renderbuffer *back_rb; 1320 1321 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1322 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1323 1324 memset(attachments, 0, sizeof(attachments)); 1325 if ((_mesa_is_front_buffer_drawing(fb) || 1326 _mesa_is_front_buffer_reading(fb) || 1327 !back_rb) && front_rb) { 1328 /* If a fake front buffer is in use, then querying for 1329 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from 1330 * the real front buffer to the fake front buffer. So before doing the 1331 * query, we need to make sure all the pending drawing has landed in the 1332 * real front buffer. 1333 */ 1334 intel_batchbuffer_flush(brw); 1335 intel_flush_front(&brw->ctx); 1336 1337 attachments[i++] = __DRI_BUFFER_FRONT_LEFT; 1338 attachments[i++] = intel_bits_per_pixel(front_rb); 1339 } else if (front_rb && brw->front_buffer_dirty) { 1340 /* We have pending front buffer rendering, but we aren't querying for a 1341 * front buffer. If the front buffer we have is a fake front buffer, 1342 * the X server is going to throw it away when it processes the query. 1343 * So before doing the query, make sure all the pending drawing has 1344 * landed in the real front buffer. 1345 */ 1346 intel_batchbuffer_flush(brw); 1347 intel_flush_front(&brw->ctx); 1348 } 1349 1350 if (back_rb) { 1351 attachments[i++] = __DRI_BUFFER_BACK_LEFT; 1352 attachments[i++] = intel_bits_per_pixel(back_rb); 1353 } 1354 1355 assert(i <= ARRAY_SIZE(attachments)); 1356 1357 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable, 1358 &drawable->w, 1359 &drawable->h, 1360 attachments, i / 2, 1361 buffer_count, 1362 drawable->loaderPrivate); 1363} 1364 1365/** 1366 * \brief Assign a DRI buffer's DRM region to a renderbuffer. 1367 * 1368 * This is called from intel_update_renderbuffers(). 1369 * 1370 * \par Note: 1371 * DRI buffers whose attachment point is DRI2BufferStencil or 1372 * DRI2BufferDepthStencil are handled as special cases. 1373 * 1374 * \param buffer_name is a human readable name, such as "dri2 front buffer", 1375 * that is passed to drm_intel_bo_gem_create_from_name(). 1376 * 1377 * \see intel_update_renderbuffers() 1378 */ 1379static void 1380intel_process_dri2_buffer(struct brw_context *brw, 1381 __DRIdrawable *drawable, 1382 __DRIbuffer *buffer, 1383 struct intel_renderbuffer *rb, 1384 const char *buffer_name) 1385{ 1386 struct gl_framebuffer *fb = drawable->driverPrivate; 1387 drm_intel_bo *bo; 1388 1389 if (!rb) 1390 return; 1391 1392 unsigned num_samples = rb->Base.Base.NumSamples; 1393 1394 /* We try to avoid closing and reopening the same BO name, because the first 1395 * use of a mapping of the buffer involves a bunch of page faulting which is 1396 * moderately expensive. 1397 */ 1398 struct intel_mipmap_tree *last_mt; 1399 if (num_samples == 0) 1400 last_mt = rb->mt; 1401 else 1402 last_mt = rb->singlesample_mt; 1403 1404 uint32_t old_name = 0; 1405 if (last_mt) { 1406 /* The bo already has a name because the miptree was created by a 1407 * previous call to intel_process_dri2_buffer(). If a bo already has a 1408 * name, then drm_intel_bo_flink() is a low-cost getter. It does not 1409 * create a new name. 1410 */ 1411 drm_intel_bo_flink(last_mt->bo, &old_name); 1412 } 1413 1414 if (old_name == buffer->name) 1415 return; 1416 1417 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { 1418 fprintf(stderr, 1419 "attaching buffer %d, at %d, cpp %d, pitch %d\n", 1420 buffer->name, buffer->attachment, 1421 buffer->cpp, buffer->pitch); 1422 } 1423 1424 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name, 1425 buffer->name); 1426 if (!bo) { 1427 fprintf(stderr, 1428 "Failed to open BO for returned DRI2 buffer " 1429 "(%dx%d, %s, named %d).\n" 1430 "This is likely a bug in the X Server that will lead to a " 1431 "crash soon.\n", 1432 drawable->w, drawable->h, buffer_name, buffer->name); 1433 return; 1434 } 1435 1436 intel_update_winsys_renderbuffer_miptree(brw, rb, bo, 1437 drawable->w, drawable->h, 1438 buffer->pitch); 1439 1440 if (_mesa_is_front_buffer_drawing(fb) && 1441 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || 1442 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && 1443 rb->Base.Base.NumSamples > 1) { 1444 intel_renderbuffer_upsample(brw, rb); 1445 } 1446 1447 assert(rb->mt); 1448 1449 drm_intel_bo_unreference(bo); 1450} 1451 1452/** 1453 * \brief Query DRI image loader to obtain a DRIdrawable's buffers. 1454 * 1455 * To determine which DRI buffers to request, examine the renderbuffers 1456 * attached to the drawable's framebuffer. Then request the buffers from 1457 * the image loader 1458 * 1459 * This is called from intel_update_renderbuffers(). 1460 * 1461 * \param drawable Drawable whose buffers are queried. 1462 * \param buffers [out] List of buffers returned by DRI2 query. 1463 * \param buffer_count [out] Number of buffers returned. 1464 * 1465 * \see intel_update_renderbuffers() 1466 */ 1467 1468static void 1469intel_update_image_buffer(struct brw_context *intel, 1470 __DRIdrawable *drawable, 1471 struct intel_renderbuffer *rb, 1472 __DRIimage *buffer, 1473 enum __DRIimageBufferMask buffer_type) 1474{ 1475 struct gl_framebuffer *fb = drawable->driverPrivate; 1476 1477 if (!rb || !buffer->bo) 1478 return; 1479 1480 unsigned num_samples = rb->Base.Base.NumSamples; 1481 1482 /* Check and see if we're already bound to the right 1483 * buffer object 1484 */ 1485 struct intel_mipmap_tree *last_mt; 1486 if (num_samples == 0) 1487 last_mt = rb->mt; 1488 else 1489 last_mt = rb->singlesample_mt; 1490 1491 if (last_mt && last_mt->bo == buffer->bo) 1492 return; 1493 1494 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo, 1495 buffer->width, buffer->height, 1496 buffer->pitch); 1497 1498 if (_mesa_is_front_buffer_drawing(fb) && 1499 buffer_type == __DRI_IMAGE_BUFFER_FRONT && 1500 rb->Base.Base.NumSamples > 1) { 1501 intel_renderbuffer_upsample(intel, rb); 1502 } 1503} 1504 1505static void 1506intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1507{ 1508 struct gl_framebuffer *fb = drawable->driverPrivate; 1509 __DRIscreen *screen = brw->intelScreen->driScrnPriv; 1510 struct intel_renderbuffer *front_rb; 1511 struct intel_renderbuffer *back_rb; 1512 struct __DRIimageList images; 1513 unsigned int format; 1514 uint32_t buffer_mask = 0; 1515 1516 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1517 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1518 1519 if (back_rb) 1520 format = intel_rb_format(back_rb); 1521 else if (front_rb) 1522 format = intel_rb_format(front_rb); 1523 else 1524 return; 1525 1526 if (front_rb && (_mesa_is_front_buffer_drawing(fb) || 1527 _mesa_is_front_buffer_reading(fb) || !back_rb)) { 1528 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; 1529 } 1530 1531 if (back_rb) 1532 buffer_mask |= __DRI_IMAGE_BUFFER_BACK; 1533 1534 (*screen->image.loader->getBuffers) (drawable, 1535 driGLFormatToImageFormat(format), 1536 &drawable->dri2.stamp, 1537 drawable->loaderPrivate, 1538 buffer_mask, 1539 &images); 1540 1541 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { 1542 drawable->w = images.front->width; 1543 drawable->h = images.front->height; 1544 intel_update_image_buffer(brw, 1545 drawable, 1546 front_rb, 1547 images.front, 1548 __DRI_IMAGE_BUFFER_FRONT); 1549 } 1550 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) { 1551 drawable->w = images.back->width; 1552 drawable->h = images.back->height; 1553 intel_update_image_buffer(brw, 1554 drawable, 1555 back_rb, 1556 images.back, 1557 __DRI_IMAGE_BUFFER_BACK); 1558 } 1559} 1560