brw_context.c revision 848c0e72f36d0e1e460193a2d30b2f631529156f
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "main/api_exec.h" 34#include "main/imports.h" 35#include "main/macros.h" 36#include "main/points.h" 37#include "main/simple_list.h" 38#include "main/version.h" 39#include "main/vtxfmt.h" 40 41#include "vbo/vbo_context.h" 42 43#include "brw_context.h" 44#include "brw_defines.h" 45#include "brw_draw.h" 46#include "brw_state.h" 47 48#include "intel_fbo.h" 49#include "intel_mipmap_tree.h" 50#include "intel_regions.h" 51#include "intel_tex.h" 52#include "intel_tex_obj.h" 53 54#include "tnl/t_pipeline.h" 55#include "glsl/ralloc.h" 56 57/*************************************** 58 * Mesa's Driver Functions 59 ***************************************/ 60 61static size_t 62brw_query_samples_for_format(struct gl_context *ctx, GLenum target, 63 GLenum internalFormat, int samples[16]) 64{ 65 struct brw_context *brw = brw_context(ctx); 66 67 (void) target; 68 69 switch (brw->gen) { 70 case 7: 71 samples[0] = 8; 72 samples[1] = 4; 73 return 2; 74 75 case 6: 76 samples[0] = 4; 77 return 1; 78 79 default: 80 samples[0] = 1; 81 return 1; 82 } 83} 84 85static void brwInitDriverFunctions(struct intel_screen *screen, 86 struct dd_function_table *functions) 87{ 88 intelInitDriverFunctions( functions ); 89 90 brwInitFragProgFuncs( functions ); 91 brw_init_common_queryobj_functions(functions); 92 if (screen->gen >= 6) 93 gen6_init_queryobj_functions(functions); 94 else 95 gen4_init_queryobj_functions(functions); 96 97 functions->QuerySamplesForFormat = brw_query_samples_for_format; 98 99 if (screen->gen >= 7) { 100 functions->BeginTransformFeedback = gen7_begin_transform_feedback; 101 functions->EndTransformFeedback = gen7_end_transform_feedback; 102 } else { 103 functions->BeginTransformFeedback = brw_begin_transform_feedback; 104 functions->EndTransformFeedback = brw_end_transform_feedback; 105 } 106 107 if (screen->gen >= 6) 108 functions->GetSamplePosition = gen6_get_sample_position; 109} 110 111static void 112brw_initialize_context_constants(struct brw_context *brw) 113{ 114 struct gl_context *ctx = &brw->ctx; 115 116 ctx->Const.QueryCounterBits.Timestamp = 36; 117 118 ctx->Const.StripTextureBorder = true; 119 120 ctx->Const.MaxDualSourceDrawBuffers = 1; 121 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; 122 ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; 123 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ 124 ctx->Const.MaxTextureUnits = 125 MIN2(ctx->Const.MaxTextureCoordUnits, 126 ctx->Const.FragmentProgram.MaxTextureImageUnits); 127 ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; 128 ctx->Const.MaxCombinedTextureImageUnits = 129 ctx->Const.VertexProgram.MaxTextureImageUnits + 130 ctx->Const.FragmentProgram.MaxTextureImageUnits; 131 132 ctx->Const.MaxTextureLevels = 14; /* 8192 */ 133 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS) 134 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; 135 ctx->Const.Max3DTextureLevels = 9; 136 ctx->Const.MaxCubeTextureLevels = 12; 137 138 if (brw->gen >= 7) 139 ctx->Const.MaxArrayTextureLayers = 2048; 140 else 141 ctx->Const.MaxArrayTextureLayers = 512; 142 143 ctx->Const.MaxTextureRectSize = 1 << 12; 144 145 ctx->Const.MaxTextureMaxAnisotropy = 16.0; 146 147 ctx->Const.MaxRenderbufferSize = 8192; 148 149 /* Hardware only supports a limited number of transform feedback buffers. 150 * So we need to override the Mesa default (which is based only on software 151 * limits). 152 */ 153 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; 154 155 /* On Gen6, in the worst case, we use up one binding table entry per 156 * transform feedback component (see comments above the definition of 157 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value 158 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to 159 * BRW_MAX_SOL_BINDINGS. 160 * 161 * In "separate components" mode, we need to divide this value by 162 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries 163 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. 164 */ 165 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; 166 ctx->Const.MaxTransformFeedbackSeparateComponents = 167 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; 168 169 if (brw->gen == 6) { 170 ctx->Const.MaxSamples = 4; 171 ctx->Const.MaxColorTextureSamples = 4; 172 ctx->Const.MaxDepthTextureSamples = 4; 173 ctx->Const.MaxIntegerSamples = 4; 174 } else if (brw->gen >= 7) { 175 ctx->Const.MaxSamples = 8; 176 ctx->Const.MaxColorTextureSamples = 8; 177 ctx->Const.MaxDepthTextureSamples = 8; 178 ctx->Const.MaxIntegerSamples = 8; 179 } 180 181 ctx->Const.MinLineWidth = 1.0; 182 ctx->Const.MinLineWidthAA = 1.0; 183 ctx->Const.MaxLineWidth = 5.0; 184 ctx->Const.MaxLineWidthAA = 5.0; 185 ctx->Const.LineWidthGranularity = 0.5; 186 187 ctx->Const.MinPointSize = 1.0; 188 ctx->Const.MinPointSizeAA = 1.0; 189 ctx->Const.MaxPointSize = 255.0; 190 ctx->Const.MaxPointSizeAA = 255.0; 191 ctx->Const.PointSizeGranularity = 1.0; 192 193 if (brw->gen >= 5 || brw->is_g4x) 194 ctx->Const.MaxClipPlanes = 8; 195 196 ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024; 197 ctx->Const.VertexProgram.MaxAluInstructions = 0; 198 ctx->Const.VertexProgram.MaxTexInstructions = 0; 199 ctx->Const.VertexProgram.MaxTexIndirections = 0; 200 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; 201 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; 202 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; 203 ctx->Const.VertexProgram.MaxNativeAttribs = 16; 204 ctx->Const.VertexProgram.MaxNativeTemps = 256; 205 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; 206 ctx->Const.VertexProgram.MaxNativeParameters = 1024; 207 ctx->Const.VertexProgram.MaxEnvParams = 208 MIN2(ctx->Const.VertexProgram.MaxNativeParameters, 209 ctx->Const.VertexProgram.MaxEnvParams); 210 211 ctx->Const.FragmentProgram.MaxNativeInstructions = 1024; 212 ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024; 213 ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024; 214 ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024; 215 ctx->Const.FragmentProgram.MaxNativeAttribs = 12; 216 ctx->Const.FragmentProgram.MaxNativeTemps = 256; 217 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; 218 ctx->Const.FragmentProgram.MaxNativeParameters = 1024; 219 ctx->Const.FragmentProgram.MaxEnvParams = 220 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, 221 ctx->Const.FragmentProgram.MaxEnvParams); 222 223 /* Fragment shaders use real, 32-bit twos-complement integers for all 224 * integer types. 225 */ 226 ctx->Const.FragmentProgram.LowInt.RangeMin = 31; 227 ctx->Const.FragmentProgram.LowInt.RangeMax = 30; 228 ctx->Const.FragmentProgram.LowInt.Precision = 0; 229 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt; 230 ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt; 231 232 /* Gen6 converts quads to polygon in beginning of 3D pipeline, 233 * but we're not sure how it's actually done for vertex order, 234 * that affect provoking vertex decision. Always use last vertex 235 * convention for quad primitive which works as expected for now. 236 */ 237 if (brw->gen >= 6) 238 ctx->Const.QuadsFollowProvokingVertexConvention = false; 239 240 ctx->Const.NativeIntegers = true; 241 ctx->Const.UniformBooleanTrue = 1; 242 ctx->Const.UniformBufferOffsetAlignment = 16; 243 244 ctx->Const.ForceGLSLExtensionsWarn = 245 driQueryOptionb(&brw->optionCache, "force_glsl_extensions_warn"); 246 247 ctx->Const.DisableGLSLLineContinuations = 248 driQueryOptionb(&brw->optionCache, "disable_glsl_line_continuations"); 249 250 if (brw->gen >= 6) { 251 ctx->Const.MaxVarying = 32; 252 ctx->Const.VertexProgram.MaxOutputComponents = 128; 253 ctx->Const.GeometryProgram.MaxInputComponents = 128; 254 ctx->Const.GeometryProgram.MaxOutputComponents = 128; 255 ctx->Const.FragmentProgram.MaxInputComponents = 128; 256 } 257 258 /* We want the GLSL compiler to emit code that uses condition codes */ 259 for (int i = 0; i < MESA_SHADER_TYPES; i++) { 260 ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; 261 ctx->ShaderCompilerOptions[i].EmitCondCodes = true; 262 ctx->ShaderCompilerOptions[i].EmitNoNoise = true; 263 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true; 264 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true; 265 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true; 266 267 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = 268 (i == MESA_SHADER_FRAGMENT); 269 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = 270 (i == MESA_SHADER_FRAGMENT); 271 ctx->ShaderCompilerOptions[i].LowerClipDistance = true; 272 } 273 274 ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true; 275} 276 277bool 278brwCreateContext(int api, 279 const struct gl_config *mesaVis, 280 __DRIcontext *driContextPriv, 281 unsigned major_version, 282 unsigned minor_version, 283 uint32_t flags, 284 unsigned *error, 285 void *sharedContextPrivate) 286{ 287 __DRIscreen *sPriv = driContextPriv->driScreenPriv; 288 struct intel_screen *screen = sPriv->driverPrivate; 289 struct dd_function_table functions; 290 291 struct brw_context *brw = rzalloc(NULL, struct brw_context); 292 if (!brw) { 293 printf("%s: failed to alloc context\n", __FUNCTION__); 294 *error = __DRI_CTX_ERROR_NO_MEMORY; 295 return false; 296 } 297 298 /* brwInitVtbl needs to know the chipset generation so that it can set the 299 * right pointers. 300 */ 301 brw->gen = screen->gen; 302 303 brwInitVtbl( brw ); 304 305 brwInitDriverFunctions(screen, &functions); 306 307 struct gl_context *ctx = &brw->ctx; 308 309 if (!intelInitContext( brw, api, major_version, minor_version, 310 mesaVis, driContextPriv, 311 sharedContextPrivate, &functions, 312 error)) { 313 ralloc_free(brw); 314 return false; 315 } 316 317 brw_initialize_context_constants(brw); 318 319 /* Reinitialize the context point state. It depends on ctx->Const values. */ 320 _mesa_init_point(ctx); 321 322 if (brw->gen >= 6) { 323 /* Create a new hardware context. Using a hardware context means that 324 * our GPU state will be saved/restored on context switch, allowing us 325 * to assume that the GPU is in the same state we left it in. 326 * 327 * This is required for transform feedback buffer offsets, query objects, 328 * and also allows us to reduce how much state we have to emit. 329 */ 330 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); 331 332 if (!brw->hw_ctx) { 333 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); 334 ralloc_free(brw); 335 return false; 336 } 337 } 338 339 brw_init_surface_formats(brw); 340 341 /* Initialize swrast, tnl driver tables: */ 342 TNLcontext *tnl = TNL_CONTEXT(ctx); 343 if (tnl) 344 tnl->Driver.RunPipeline = _tnl_run_pipeline; 345 346 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; 347 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; 348 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; 349 350 if (brw->is_g4x || brw->gen >= 5) { 351 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS; 352 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; 353 brw->has_surface_tile_offset = true; 354 if (brw->gen < 6) 355 brw->has_compr4 = true; 356 brw->has_aa_line_parameters = true; 357 brw->has_pln = true; 358 } else { 359 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS; 360 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; 361 } 362 363 /* WM maximum threads is number of EUs times number of threads per EU. */ 364 assert(brw->gen <= 7); 365 366 if (brw->is_haswell) { 367 if (brw->gt == 1) { 368 brw->max_wm_threads = 102; 369 brw->max_vs_threads = 70; 370 brw->max_gs_threads = 70; 371 brw->urb.size = 128; 372 brw->urb.min_vs_entries = 32; 373 brw->urb.max_vs_entries = 640; 374 brw->urb.max_gs_entries = 256; 375 } else if (brw->gt == 2) { 376 brw->max_wm_threads = 204; 377 brw->max_vs_threads = 280; 378 brw->max_gs_threads = 256; 379 brw->urb.size = 256; 380 brw->urb.min_vs_entries = 64; 381 brw->urb.max_vs_entries = 1664; 382 brw->urb.max_gs_entries = 640; 383 } else if (brw->gt == 3) { 384 brw->max_wm_threads = 408; 385 brw->max_vs_threads = 280; 386 brw->max_gs_threads = 256; 387 brw->urb.size = 512; 388 brw->urb.min_vs_entries = 64; 389 brw->urb.max_vs_entries = 1664; 390 brw->urb.max_gs_entries = 640; 391 } 392 } else if (brw->gen == 7) { 393 if (brw->gt == 1) { 394 brw->max_wm_threads = 48; 395 brw->max_vs_threads = 36; 396 brw->max_gs_threads = 36; 397 brw->urb.size = 128; 398 brw->urb.min_vs_entries = 32; 399 brw->urb.max_vs_entries = 512; 400 brw->urb.max_gs_entries = 192; 401 } else if (brw->gt == 2) { 402 brw->max_wm_threads = 172; 403 brw->max_vs_threads = 128; 404 brw->max_gs_threads = 128; 405 brw->urb.size = 256; 406 brw->urb.min_vs_entries = 32; 407 brw->urb.max_vs_entries = 704; 408 brw->urb.max_gs_entries = 320; 409 } else { 410 assert(!"Unknown gen7 device."); 411 } 412 } else if (brw->gen == 6) { 413 if (brw->gt == 2) { 414 brw->max_wm_threads = 80; 415 brw->max_vs_threads = 60; 416 brw->max_gs_threads = 60; 417 brw->urb.size = 64; /* volume 5c.5 section 5.1 */ 418 brw->urb.min_vs_entries = 24; 419 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ 420 brw->urb.max_gs_entries = 256; 421 } else { 422 brw->max_wm_threads = 40; 423 brw->max_vs_threads = 24; 424 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */ 425 brw->urb.size = 32; /* volume 5c.5 section 5.1 */ 426 brw->urb.min_vs_entries = 24; 427 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ 428 brw->urb.max_gs_entries = 256; 429 } 430 brw->urb.gen6_gs_previously_active = false; 431 } else if (brw->gen == 5) { 432 brw->urb.size = 1024; 433 brw->max_vs_threads = 72; 434 brw->max_gs_threads = 32; 435 brw->max_wm_threads = 12 * 6; 436 } else if (brw->is_g4x) { 437 brw->urb.size = 384; 438 brw->max_vs_threads = 32; 439 brw->max_gs_threads = 2; 440 brw->max_wm_threads = 10 * 5; 441 } else if (brw->gen < 6) { 442 brw->urb.size = 256; 443 brw->max_vs_threads = 16; 444 brw->max_gs_threads = 2; 445 brw->max_wm_threads = 8 * 4; 446 brw->has_negative_rhw_bug = true; 447 } 448 449 if (brw->gen <= 7) { 450 brw->needs_unlit_centroid_workaround = true; 451 } 452 453 brw->prim_restart.in_progress = false; 454 brw->prim_restart.enable_cut_index = false; 455 456 brw_init_state( brw ); 457 458 if (brw->gen < 6) { 459 brw->curbe.last_buf = calloc(1, 4096); 460 brw->curbe.next_buf = calloc(1, 4096); 461 } 462 463 brw->state.dirty.mesa = ~0; 464 brw->state.dirty.brw = ~0; 465 466 /* Make sure that brw->state.dirty.brw has enough bits to hold all possible 467 * dirty flags. 468 */ 469 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw)); 470 471 brw->emit_state_always = 0; 472 473 brw->batch.need_workaround_flush = true; 474 475 ctx->VertexProgram._MaintainTnlProgram = true; 476 ctx->FragmentProgram._MaintainTexEnvProgram = true; 477 478 brw_draw_init( brw ); 479 480 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); 481 brw->disable_derivative_optimization = 482 driQueryOptionb(&brw->optionCache, "disable_derivative_optimization"); 483 484 ctx->Const.ContextFlags = 0; 485 if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0) 486 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; 487 488 ctx->Debug.DebugOutput = GL_FALSE; 489 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { 490 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT; 491 ctx->Debug.DebugOutput = GL_TRUE; 492 493 /* Turn on some extra GL_ARB_debug_output generation. */ 494 brw->perf_debug = true; 495 } 496 497 brw_fs_alloc_reg_sets(brw); 498 brw_vec4_alloc_reg_set(brw); 499 500 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 501 brw_init_shader_time(brw); 502 503 _mesa_compute_version(ctx); 504 505 _mesa_initialize_dispatch_tables(ctx); 506 _mesa_initialize_vbo_vtxfmt(ctx); 507 508 if (ctx->Extensions.AMD_performance_monitor) { 509 brw_init_performance_monitors(brw); 510 } 511 512 return true; 513} 514 515