d3d11_context.h revision 1b15a3cafdc699c63466059d56f36b295475ee9e
1/************************************************************************** 2 * 3 * Copyright 2010 Luca Barbieri 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27/* used to unbind things, we need 128 due to resources */ 28static const void* zero_data[128]; 29 30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0) 31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1) 32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2)) 33 34#if API >= 11 35template<typename PtrTraits> 36struct GalliumD3D11DeviceContext : 37 public GalliumD3D11DeviceChild<ID3D11DeviceContext> 38{ 39#else 40template<bool threadsafe> 41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> 42{ 43 typedef simple_ptr_traits PtrTraits; 44 typedef GalliumD3D10Device GalliumD3D10DeviceContext; 45#endif 46 47 refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES]; 48 refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout; 49 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer; 50 refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state; 51 refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state; 52 refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state; 53 refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view; 54 refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate; 55 56 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT]; 57 refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT]; 58 refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; 59 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; 60 refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; 61 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT]; 62 63#if API >= 11 64 refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT]; 65 refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; 66#endif 67 68 D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; 69 D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; 70 unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT]; 71 D3D11_PRIMITIVE_TOPOLOGY primitive_topology; 72 DXGI_FORMAT index_format; 73 unsigned index_offset; 74 BOOL render_predicate_value; 75 float blend_color[4]; 76 unsigned sample_mask; 77 unsigned stencil_ref; 78 bool depth_clamp; 79 80 void* default_input_layout; 81 void* default_rasterizer; 82 void* default_depth_stencil; 83 void* default_blend; 84 void* default_sampler; 85 void* ld_sampler; 86 void * default_shaders[D3D11_STAGES]; 87 88 // derived state 89 int primitive_mode; 90 struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; 91 struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT]; 92 struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT]; 93 struct 94 { 95 void* ld; // accessed with a -1 index from v 96 void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; 97 } sampler_csos[D3D11_STAGES]; 98 struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT]; 99 unsigned num_shader_resource_views[D3D11_STAGES]; 100 unsigned num_samplers[D3D11_STAGES]; 101 unsigned num_vertex_buffers; 102 unsigned num_render_target_views; 103 unsigned num_viewports; 104 unsigned num_scissor_rects; 105 unsigned num_so_targets; 106 107 struct pipe_context* pipe; 108 unsigned update_flags; 109 110 bool owns_pipe; 111 unsigned context_flags; 112 113 GalliumD3D11Caps caps; 114 115 cso_context* cso_ctx; 116 gen_mipmap_state* gen_mipmap; 117 118#if API >= 11 119#define SYNCHRONIZED do {} while(0) 120 121 GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0) 122 : GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags) 123 { 124 caps = device->screen_caps; 125 init_context(); 126 } 127 128 ~GalliumD3D11DeviceContext() 129 { 130 destroy_context(); 131 } 132#else 133#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex) 134 135 GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter) 136 : GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0) 137 { 138 caps = this->screen_caps; 139 init_context(); 140 } 141 142 ~GalliumD3D10Device() 143 { 144 destroy_context(); 145 } 146#endif 147 148 void init_context() 149 { 150 if(!pipe->begin_query) 151 caps.queries = false; 152 if(!pipe->render_condition) 153 caps.render_condition = false; 154 if(!pipe->bind_gs_state) 155 { 156 caps.gs = false; 157 caps.stages = 2; 158 } 159 if(!pipe->set_stream_output_buffers) 160 caps.so = false; 161 162 update_flags = 0; 163 164 // pipeline state 165 memset(viewports, 0, sizeof(viewports)); 166 memset(scissor_rects, 0, sizeof(scissor_rects)); 167 memset(so_offsets, 0, sizeof(so_offsets)); 168 primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; 169 index_format = DXGI_FORMAT_UNKNOWN; 170 index_offset = 0; 171 render_predicate_value = 0; 172 memset(blend_color, 0, sizeof(blend_color)); 173 sample_mask = ~0; 174 stencil_ref = 0; 175 depth_clamp = 0; 176 177 // derived state 178 primitive_mode = 0; 179 memset(vertex_buffers, 0, sizeof(vertex_buffers)); 180 memset(so_buffers, 0, sizeof(so_buffers)); 181 memset(sampler_views, 0, sizeof(sampler_views)); 182 memset(sampler_csos, 0, sizeof(sampler_csos)); 183 memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views)); 184 memset(num_samplers, 0, sizeof(num_samplers)); 185 num_vertex_buffers = 0; 186 num_render_target_views = 0; 187 num_viewports = 0; 188 num_scissor_rects = 0; 189 num_so_targets = 0; 190 191 default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0); 192 193 struct pipe_rasterizer_state rasterizerd; 194 memset(&rasterizerd, 0, sizeof(rasterizerd)); 195 rasterizerd.gl_rasterization_rules = 1; 196 rasterizerd.cull_face = PIPE_FACE_BACK; 197 default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd); 198 199 struct pipe_depth_stencil_alpha_state depth_stencild; 200 memset(&depth_stencild, 0, sizeof(depth_stencild)); 201 depth_stencild.depth.enabled = TRUE; 202 depth_stencild.depth.writemask = 1; 203 depth_stencild.depth.func = PIPE_FUNC_LESS; 204 default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild); 205 206 struct pipe_blend_state blendd; 207 memset(&blendd, 0, sizeof(blendd)); 208 blendd.rt[0].colormask = 0xf; 209 default_blend = pipe->create_blend_state(pipe, &blendd); 210 211 struct pipe_sampler_state samplerd; 212 memset(&samplerd, 0, sizeof(samplerd)); 213 samplerd.normalized_coords = 1; 214 samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR; 215 samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR; 216 samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; 217 samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 218 samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 219 samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 220 samplerd.border_color[0] = 1.0f; 221 samplerd.border_color[1] = 1.0f; 222 samplerd.border_color[2] = 1.0f; 223 samplerd.border_color[3] = 1.0f; 224 samplerd.min_lod = -FLT_MAX; 225 samplerd.max_lod = FLT_MAX; 226 samplerd.max_anisotropy = 1; 227 default_sampler = pipe->create_sampler_state(pipe, &samplerd); 228 229 memset(&samplerd, 0, sizeof(samplerd)); 230 samplerd.normalized_coords = 0; 231 samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST; 232 samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 233 samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 234 samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER; 235 samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER; 236 samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER; 237 samplerd.min_lod = -FLT_MAX; 238 samplerd.max_lod = FLT_MAX; 239 samplerd.max_anisotropy = 1; 240 ld_sampler = pipe->create_sampler_state(pipe, &samplerd); 241 242 for(unsigned s = 0; s < D3D11_STAGES; ++s) 243 { 244 sampler_csos[s].ld = ld_sampler; 245 for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i) 246 sampler_csos[s].v[i] = default_sampler; 247 } 248 249 // TODO: should this really be empty shaders, or should they be all-passthrough? 250 memset(default_shaders, 0, sizeof(default_shaders)); 251 struct ureg_program *ureg; 252 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); 253 ureg_END(ureg); 254 default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe); 255 256 ureg = ureg_create(TGSI_PROCESSOR_VERTEX); 257 ureg_END(ureg); 258 default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe); 259 260 cso_ctx = cso_create_context(pipe); 261 gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx); 262 263 RestoreGalliumState(); 264 } 265 266 void destroy_context() 267 { 268 util_destroy_gen_mipmap(gen_mipmap); 269 cso_destroy_context(cso_ctx); 270 271 pipe->bind_vertex_elements_state(pipe, 0); 272 pipe->delete_vertex_elements_state(pipe, default_input_layout); 273 274 pipe->bind_rasterizer_state(pipe, 0); 275 pipe->delete_rasterizer_state(pipe, default_rasterizer); 276 277 pipe->bind_depth_stencil_alpha_state(pipe, 0); 278 pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil); 279 280 pipe->bind_blend_state(pipe, 0); 281 pipe->delete_blend_state(pipe, default_blend); 282 283 pipe->bind_fragment_sampler_states(pipe, 0, 0); 284 pipe->bind_vertex_sampler_states(pipe, 0, 0); 285 if(pipe->bind_geometry_sampler_states) 286 pipe->bind_geometry_sampler_states(pipe, 0, 0); 287 pipe->delete_sampler_state(pipe, default_sampler); 288 pipe->delete_sampler_state(pipe, ld_sampler); 289 290 pipe->bind_fs_state(pipe, 0); 291 pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]); 292 293 pipe->bind_vs_state(pipe, 0); 294 pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]); 295 296 if(owns_pipe) 297 pipe->destroy(pipe); 298 } 299 300 virtual unsigned STDMETHODCALLTYPE GetContextFlags(void) 301 { 302 return context_flags; 303 } 304#if API >= 11 305#define SET_SHADER_EXTRA_ARGS , \ 306 __in_ecount_opt(NumClassInstances) ID3D11ClassInstance *const *ppClassInstances, \ 307 unsigned NumClassInstances 308#define GET_SHADER_EXTRA_ARGS , \ 309 __out_ecount_opt(*pNumClassInstances) ID3D11ClassInstance **ppClassInstances, \ 310 __inout_opt unsigned *pNumClassInstances 311#else 312#define SET_SHADER_EXTRA_ARGS 313#define GET_SHADER_EXTRA_ARGS 314#endif 315 316/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer. 317 * Instead, you have to pass a pointer to nulls to unbind things. 318 * We do the same. 319 * TODO: is D3D10 the same? 320 */ 321 template<unsigned s> 322 void xs_set_shader(GalliumD3D11Shader<>* shader) 323 { 324 if(shader != shaders[s].p) 325 { 326 shaders[s] = shader; 327 void* shader_cso = shader ? shader->object : default_shaders[s]; 328 switch(s) 329 { 330 case PIPE_SHADER_VERTEX: 331 pipe->bind_vs_state(pipe, shader_cso); 332 break; 333 case PIPE_SHADER_FRAGMENT: 334 pipe->bind_fs_state(pipe, shader_cso); 335 break; 336 case PIPE_SHADER_GEOMETRY: 337 pipe->bind_gs_state(pipe, shader_cso); 338 break; 339 } 340 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s)); 341 } 342 } 343 344 template<unsigned s> 345 void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs) 346 { 347 for(unsigned i = 0; i < count; ++i) 348 { 349 if(constbufs[i] != constant_buffers[s][i].p) 350 { 351 constant_buffers[s][i] = constbufs[i]; 352 if(s < caps.stages && start + i < caps.constant_buffers[s]) 353 pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL); 354 } 355 } 356 } 357 358 template<unsigned s> 359 void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs) 360 { 361 int last_different = -1; 362 for(unsigned i = 0; i < count; ++i) 363 { 364 if(shader_resource_views[s][start + i].p != srvs[i]) 365 { 366 shader_resource_views[s][start + i] = srvs[i]; 367 sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0; 368 last_different = i; 369 } 370 } 371 if(last_different >= 0) 372 { 373 num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1); 374 update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s); 375 } 376 } 377 378 template<unsigned s> 379 void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps) 380 { 381 int last_different = -1; 382 for(unsigned i = 0; i < count; ++i) 383 { 384 if(samplers[s][start + i].p != samps[i]) 385 { 386 samplers[s][start + i] = samps[i]; 387 sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler; 388 } 389 if(last_different >= 0) 390 { 391 num_samplers[s] = std::max(num_samplers[s], start + last_different + 1); 392 update_flags |= (UPDATE_SAMPLERS_SHIFT + s); 393 } 394 } 395 } 396 397#define IMPLEMENT_SHADER_STAGE(XS, Stage) \ 398 virtual void STDMETHODCALLTYPE XS##SetShader( \ 399 __in_opt ID3D11##Stage##Shader *pShader \ 400 SET_SHADER_EXTRA_ARGS) \ 401 { \ 402 SYNCHRONIZED; \ 403 xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \ 404 } \ 405 virtual void STDMETHODCALLTYPE XS##GetShader(\ 406 __out ID3D11##Stage##Shader **ppShader \ 407 GET_SHADER_EXTRA_ARGS) \ 408 { \ 409 SYNCHRONIZED; \ 410 *ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \ 411 } \ 412 virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\ 413 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1) unsigned StartSlot, \ 414 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot) unsigned NumBuffers, \ 415 __in_ecount(NumBuffers) ID3D11Buffer *const *ppConstantBuffers) \ 416 { \ 417 SYNCHRONIZED; \ 418 xs_set_constant_buffers<D3D11_STAGE_##XS>(StartSlot, NumBuffers, (GalliumD3D11Buffer *const *)ppConstantBuffers); \ 419 } \ 420 virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\ 421 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1) unsigned StartSlot, \ 422 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot) unsigned NumBuffers, \ 423 __out_ecount(NumBuffers) ID3D11Buffer **ppConstantBuffers) \ 424 { \ 425 SYNCHRONIZED; \ 426 for(unsigned i = 0; i < NumBuffers; ++i) \ 427 ppConstantBuffers[i] = constant_buffers[D3D11_STAGE_##XS][StartSlot + i].ref(); \ 428 } \ 429 virtual void STDMETHODCALLTYPE XS##SetShaderResources(\ 430 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, \ 431 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumViews, \ 432 __in_ecount(NumViews) ID3D11ShaderResourceView *const *ppShaderResourceViews) \ 433 { \ 434 SYNCHRONIZED; \ 435 xs_set_shader_resources<D3D11_STAGE_##XS>(StartSlot, NumViews, (GalliumD3D11ShaderResourceView *const *)ppShaderResourceViews); \ 436 } \ 437 virtual void STDMETHODCALLTYPE XS##GetShaderResources(\ 438 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, \ 439 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumViews, \ 440 __out_ecount(NumViews) ID3D11ShaderResourceView **ppShaderResourceViews) \ 441 { \ 442 SYNCHRONIZED; \ 443 for(unsigned i = 0; i < NumViews; ++i) \ 444 ppShaderResourceViews[i] = shader_resource_views[D3D11_STAGE_##XS][StartSlot + i].ref(); \ 445 } \ 446 virtual void STDMETHODCALLTYPE XS##SetSamplers(\ 447 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1) unsigned StartSlot, \ 448 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot) unsigned NumSamplers, \ 449 __in_ecount(NumSamplers) ID3D11SamplerState *const *ppSamplers) \ 450 { \ 451 SYNCHRONIZED; \ 452 xs_set_samplers<D3D11_STAGE_##XS>(StartSlot, NumSamplers, (GalliumD3D11SamplerState *const *)ppSamplers); \ 453 } \ 454 virtual void STDMETHODCALLTYPE XS##GetSamplers( \ 455 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1) unsigned StartSlot, \ 456 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot) unsigned NumSamplers, \ 457 __out_ecount(NumSamplers) ID3D11SamplerState **ppSamplers) \ 458 { \ 459 SYNCHRONIZED; \ 460 for(unsigned i = 0; i < NumSamplers; ++i) \ 461 ppSamplers[i] = samplers[D3D11_STAGE_##XS][StartSlot + i].ref(); \ 462 } 463 464#define DO_VS(x) x 465#define DO_GS(x) do {if(caps.gs) {x;}} while(0) 466#define DO_PS(x) x 467#define DO_HS(x) 468#define DO_DS(x) 469#define DO_CS(x) 470 IMPLEMENT_SHADER_STAGE(VS, Vertex) 471 IMPLEMENT_SHADER_STAGE(GS, Geometry) 472 IMPLEMENT_SHADER_STAGE(PS, Pixel) 473 474#if API >= 11 475 IMPLEMENT_SHADER_STAGE(HS, Hull) 476 IMPLEMENT_SHADER_STAGE(DS, Domain) 477 IMPLEMENT_SHADER_STAGE(CS, Compute) 478 479 virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews( 480 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned StartSlot, 481 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot) unsigned NumUAVs, 482 __in_ecount(NumUAVs) ID3D11UnorderedAccessView *const *ppUnorderedAccessViews, 483 __in_ecount(NumUAVs) const unsigned *pUAVInitialCounts) 484 { 485 SYNCHRONIZED; 486 for(unsigned i = 0; i < NumUAVs; ++i) 487 cs_unordered_access_views[StartSlot + i] = ppUnorderedAccessViews[i]; 488 } 489 490 virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews( 491 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned StartSlot, 492 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot) unsigned NumUAVs, 493 __out_ecount(NumUAVs) ID3D11UnorderedAccessView **ppUnorderedAccessViews) 494 { 495 SYNCHRONIZED; 496 for(unsigned i = 0; i < NumUAVs; ++i) 497 ppUnorderedAccessViews[i] = cs_unordered_access_views[StartSlot + i].ref(); 498 } 499#endif 500 501 template<unsigned s> 502 void update_stage() 503 { 504 if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s))) 505 { 506 while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \ 507 --num_shader_resource_views[s]; 508 if(s < caps.stages) 509 { 510 struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS]; 511 unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0; 512 for(unsigned i = 0; i < num_views_to_bind; ++i) 513 { 514 views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]]; 515 } 516 switch(s) 517 { 518 case PIPE_SHADER_VERTEX: 519 pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind); 520 break; 521 case PIPE_SHADER_FRAGMENT: 522 pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind); 523 break; 524 case PIPE_SHADER_GEOMETRY: 525 pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind); 526 break; 527 } 528 } 529 } 530 531 if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s))) 532 { 533 while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1]) 534 --num_samplers[s]; 535 if(s < caps.stages) 536 { 537 void* samplers_to_bind[PIPE_MAX_SAMPLERS]; 538 unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0; 539 for(unsigned i = 0; i < num_samplers_to_bind; ++i) 540 { 541 // index can be -1 to access sampler_csos[s].ld 542 samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]); 543 } 544 switch(s) 545 { 546 case PIPE_SHADER_VERTEX: 547 pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); 548 break; 549 case PIPE_SHADER_FRAGMENT: 550 pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); 551 break; 552 case PIPE_SHADER_GEOMETRY: 553 pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); 554 break; 555 } 556 } 557 } 558 } 559 560 void update_state() 561 { 562 update_stage<D3D11_STAGE_PS>(); 563 update_stage<D3D11_STAGE_VS>(); 564 update_stage<D3D11_STAGE_GS>(); 565#if API >= 11 566 update_stage<D3D11_STAGE_HS>(); 567 update_stage<D3D11_STAGE_DS>(); 568 update_stage<D3D11_STAGE_CS>(); 569#endif 570 571 if(update_flags & UPDATE_VERTEX_BUFFERS) 572 { 573 while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer) 574 --num_vertex_buffers; 575 pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers); 576 } 577 578 update_flags = 0; 579 } 580 581 virtual void STDMETHODCALLTYPE IASetInputLayout( 582 __in_opt ID3D11InputLayout *pInputLayout) 583 { 584 SYNCHRONIZED; 585 if(pInputLayout != input_layout.p) 586 { 587 input_layout = pInputLayout; 588 pipe->bind_vertex_elements_state(pipe, pInputLayout ? ((GalliumD3D11InputLayout*)pInputLayout)->object : default_input_layout); 589 } 590 } 591 592 virtual void STDMETHODCALLTYPE IAGetInputLayout( 593 __out ID3D11InputLayout **ppInputLayout) 594 { 595 SYNCHRONIZED; 596 *ppInputLayout = input_layout.ref(); 597 } 598 599 virtual void STDMETHODCALLTYPE IASetVertexBuffers( 600 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, 601 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumBuffers, 602 __in_ecount(NumBuffers) ID3D11Buffer *const *ppVertexBuffers, 603 __in_ecount(NumBuffers) const unsigned *pStrides, 604 __in_ecount(NumBuffers) const unsigned *pOffsets) 605 { 606 SYNCHRONIZED; 607 int last_different = -1; 608 for(unsigned i = 0; i < NumBuffers; ++i) 609 { 610 ID3D11Buffer* buffer = ppVertexBuffers[i]; 611 if(buffer != input_buffers[StartSlot + i].p 612 || vertex_buffers[StartSlot + i].buffer_offset != pOffsets[i] 613 || vertex_buffers[StartSlot + i].stride != pOffsets[i] 614 ) 615 { 616 input_buffers[StartSlot + i] = buffer; 617 vertex_buffers[StartSlot + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; 618 vertex_buffers[StartSlot + i].buffer_offset = pOffsets[i]; 619 vertex_buffers[StartSlot + i].stride = pStrides[i]; 620 vertex_buffers[StartSlot + i].max_index = ~0; 621 last_different = i; 622 } 623 } 624 if(last_different >= 0) 625 { 626 num_vertex_buffers = std::max(num_vertex_buffers, StartSlot + NumBuffers); 627 update_flags |= UPDATE_VERTEX_BUFFERS; 628 } 629 } 630 631 virtual void STDMETHODCALLTYPE IAGetVertexBuffers( 632 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, 633 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumBuffers, 634 __out_ecount_opt(NumBuffers) ID3D11Buffer **ppVertexBuffers, 635 __out_ecount_opt(NumBuffers) unsigned *pStrides, 636 __out_ecount_opt(NumBuffers) unsigned *pOffsets) 637 { 638 SYNCHRONIZED; 639 if(ppVertexBuffers) 640 { 641 for(unsigned i = 0; i < NumBuffers; ++i) 642 ppVertexBuffers[i] = input_buffers[StartSlot + i].ref(); 643 } 644 645 if(pOffsets) 646 { 647 for(unsigned i = 0; i < NumBuffers; ++i) 648 pOffsets[i] = vertex_buffers[StartSlot + i].buffer_offset; 649 } 650 651 if(pStrides) 652 { 653 for(unsigned i = 0; i < NumBuffers; ++i) 654 pStrides[i] = vertex_buffers[StartSlot + i].stride; 655 } 656 } 657 658 void set_index_buffer() 659 { 660 pipe_index_buffer ib; 661 if(!index_buffer) 662 { 663 memset(&ib, 0, sizeof(ib)); 664 } 665 else 666 { 667 if(index_format == DXGI_FORMAT_R32_UINT) 668 ib.index_size = 4; 669 else if(index_format == DXGI_FORMAT_R16_UINT) 670 ib.index_size = 2; 671 else 672 ib.index_size = 1; 673 ib.offset = index_offset; 674 ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0; 675 } 676 pipe->set_index_buffer(pipe, &ib); 677 } 678 679 virtual void STDMETHODCALLTYPE IASetIndexBuffer( 680 __in_opt ID3D11Buffer *pIndexBuffer, 681 __in DXGI_FORMAT Format, 682 __in unsigned Offset) 683 { 684 SYNCHRONIZED; 685 if(index_buffer.p != pIndexBuffer || index_format != Format || index_offset != Offset) 686 { 687 index_buffer = pIndexBuffer; 688 index_format = Format; 689 index_offset = Offset; 690 691 set_index_buffer(); 692 } 693 } 694 695 virtual void STDMETHODCALLTYPE IAGetIndexBuffer( 696 __out_opt ID3D11Buffer **pIndexBuffer, 697 __out_opt DXGI_FORMAT *Format, 698 __out_opt unsigned *Offset) 699 { 700 SYNCHRONIZED; 701 if(pIndexBuffer) 702 *pIndexBuffer = index_buffer.ref(); 703 if(Format) 704 *Format = index_format; 705 if(Offset) 706 *Offset = index_offset; 707 } 708 709 virtual void STDMETHODCALLTYPE IASetPrimitiveTopology( 710 __in D3D11_PRIMITIVE_TOPOLOGY Topology) 711 { 712 SYNCHRONIZED; 713 if(primitive_topology != Topology) 714 { 715 if(Topology < D3D_PRIMITIVE_TOPOLOGY_COUNT) 716 primitive_mode = d3d_to_pipe_prim[Topology]; 717 else 718 primitive_mode = 0; 719 primitive_topology = Topology; 720 } 721 } 722 723 virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology( 724 __out D3D11_PRIMITIVE_TOPOLOGY *pTopology) 725 { 726 SYNCHRONIZED; 727 *pTopology = primitive_topology; 728 } 729 730 virtual void STDMETHODCALLTYPE DrawIndexed( 731 __in unsigned IndexCount, 732 __in unsigned StartIndexLocation, 733 __in int BaseVertexLocation) 734 { 735 SYNCHRONIZED; 736 if(update_flags) 737 update_state(); 738 739 pipe_draw_info info; 740 info.mode = primitive_mode; 741 info.indexed = TRUE; 742 info.count = IndexCount; 743 info.start = StartIndexLocation; 744 info.index_bias = BaseVertexLocation; 745 info.min_index = 0; 746 info.max_index = ~0; 747 info.start_instance = 0; 748 info.instance_count = 1; 749 750 pipe->draw_vbo(pipe, &info); 751 } 752 753 virtual void STDMETHODCALLTYPE Draw( 754 __in unsigned VertexCount, 755 __in unsigned StartVertexLocation) 756 { 757 SYNCHRONIZED; 758 if(update_flags) 759 update_state(); 760 761 pipe_draw_info info; 762 info.mode = primitive_mode; 763 info.indexed = FALSE; 764 info.count = VertexCount; 765 info.start = StartVertexLocation; 766 info.index_bias = 0; 767 info.min_index = 0; 768 info.max_index = ~0; 769 info.start_instance = 0; 770 info.instance_count = 1; 771 772 pipe->draw_vbo(pipe, &info); 773 } 774 775 virtual void STDMETHODCALLTYPE DrawIndexedInstanced( 776 __in unsigned IndexCountPerInstance, 777 __in unsigned InstanceCount, 778 __in unsigned StartIndexLocation, 779 __in int BaseVertexLocation, 780 __in unsigned StartInstanceLocation) 781 { 782 SYNCHRONIZED; 783 if(update_flags) 784 update_state(); 785 786 pipe_draw_info info; 787 info.mode = primitive_mode; 788 info.indexed = TRUE; 789 info.count = IndexCountPerInstance; 790 info.start = StartIndexLocation; 791 info.index_bias = BaseVertexLocation; 792 info.min_index = 0; 793 info.max_index = ~0; 794 info.start_instance = StartInstanceLocation; 795 info.instance_count = InstanceCount; 796 797 pipe->draw_vbo(pipe, &info); 798 } 799 800 virtual void STDMETHODCALLTYPE DrawInstanced( 801 __in unsigned VertexCountPerInstance, 802 __in unsigned InstanceCount, 803 __in unsigned StartVertexLocation, 804 __in unsigned StartInstanceLocation) 805 { 806 SYNCHRONIZED; 807 if(update_flags) 808 update_state(); 809 810 pipe_draw_info info; 811 info.mode = primitive_mode; 812 info.indexed = FALSE; 813 info.count = VertexCountPerInstance; 814 info.start = StartVertexLocation; 815 info.index_bias = 0; 816 info.min_index = 0; 817 info.max_index = ~0; 818 info.start_instance = StartInstanceLocation; 819 info.instance_count = InstanceCount; 820 821 pipe->draw_vbo(pipe, &info); 822 } 823 824 virtual void STDMETHODCALLTYPE DrawAuto(void) 825 { 826 if(!caps.so) 827 return; 828 829 SYNCHRONIZED; 830 if(update_flags) 831 update_state(); 832 833 pipe->draw_stream_output(pipe, primitive_mode); 834 } 835 836 virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect( 837 __in ID3D11Buffer *pBufferForArgs, 838 __in unsigned AlignedByteOffsetForArgs) 839 { 840 SYNCHRONIZED; 841 if(update_flags) 842 update_state(); 843 844 struct { 845 unsigned count; 846 unsigned instance_count; 847 unsigned start; 848 unsigned index_bias; 849 } data; 850 851 pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data); 852 853 pipe_draw_info info; 854 info.mode = primitive_mode; 855 info.indexed = TRUE; 856 info.start = data.start; 857 info.count = data.count; 858 info.index_bias = data.index_bias; 859 info.min_index = 0; 860 info.max_index = ~0; 861 info.start_instance = 0; 862 info.instance_count = data.instance_count; 863 864 pipe->draw_vbo(pipe, &info); 865 } 866 867 virtual void STDMETHODCALLTYPE DrawInstancedIndirect( 868 __in ID3D11Buffer *pBufferForArgs, 869 __in unsigned AlignedByteOffsetForArgs) 870 { 871 SYNCHRONIZED; 872 if(update_flags) 873 update_state(); 874 875 struct { 876 unsigned count; 877 unsigned instance_count; 878 unsigned start; 879 } data; 880 881 pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data); 882 883 pipe_draw_info info; 884 info.mode = primitive_mode; 885 info.indexed = FALSE; 886 info.start = data.start; 887 info.count = data.count; 888 info.index_bias = 0; 889 info.min_index = 0; 890 info.max_index = ~0; 891 info.start_instance = 0; 892 info.instance_count = data.instance_count; 893 894 pipe->draw_vbo(pipe, &info); 895 } 896 897#if API >= 11 898 virtual void STDMETHODCALLTYPE Dispatch( 899 __in unsigned ThreadGroupCountX, 900 __in unsigned ThreadGroupCountY, 901 __in unsigned ThreadGroupCountZ) 902 { 903// uncomment this when this is implemented 904// SYNCHRONIZED; 905// if(update_flags) 906// update_state(); 907 } 908 909 virtual void STDMETHODCALLTYPE DispatchIndirect( 910 __in ID3D11Buffer *pBufferForArgs, 911 __in unsigned AlignedByteOffsetForArgs) 912 { 913// uncomment this when this is implemented 914// SYNCHRONIZED; 915// if(update_flags) 916// update_state(); 917 } 918#endif 919 920 void set_clip() 921 { 922 pipe_clip_state clip; 923 clip.nr = 0; 924 clip.depth_clamp = depth_clamp; 925 pipe->set_clip_state(pipe, &clip); 926 } 927 928 virtual void STDMETHODCALLTYPE RSSetState( 929 __in_opt ID3D11RasterizerState *pRasterizerState) 930 { 931 SYNCHRONIZED; 932 if(pRasterizerState != rasterizer_state.p) 933 { 934 rasterizer_state = pRasterizerState; 935 pipe->bind_rasterizer_state(pipe, pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->object : default_rasterizer); 936 bool new_depth_clamp = pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->depth_clamp : false; 937 if(depth_clamp != new_depth_clamp) 938 { 939 depth_clamp = new_depth_clamp; 940 set_clip(); 941 } 942 } 943 } 944 945 virtual void STDMETHODCALLTYPE RSGetState( 946 __out ID3D11RasterizerState **ppRasterizerState) 947 { 948 SYNCHRONIZED; 949 *ppRasterizerState = rasterizer_state.ref(); 950 } 951 952 void set_viewport() 953 { 954 // TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube 955 pipe_viewport_state viewport; 956 float half_width = viewports[0].Width * 0.5f; 957 float half_height = viewports[0].Height * 0.5f; 958 959 viewport.scale[0] = half_width; 960 viewport.scale[1] = -half_height; 961 viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth); 962 viewport.scale[3] = 1.0f; 963 viewport.translate[0] = half_width + viewports[0].TopLeftX; 964 viewport.translate[1] = half_height + viewports[0].TopLeftY; 965 viewport.translate[2] = viewports[0].MinDepth; 966 viewport.translate[3] = 1.0f; 967 pipe->set_viewport_state(pipe, &viewport); 968 } 969 970 virtual void STDMETHODCALLTYPE RSSetViewports( 971 __in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned NumViewports, 972 __in_ecount_opt(NumViewports) const D3D11_VIEWPORT *pViewports) 973 { 974 SYNCHRONIZED; 975 if(NumViewports) 976 { 977 if(memcmp(&viewports[0], &pViewports[0], sizeof(viewports[0]))) 978 { 979 viewports[0] = pViewports[0]; 980 set_viewport(); 981 } 982 for(unsigned i = 1; i < NumViewports; ++i) 983 viewports[i] = pViewports[i]; 984 } 985 else if(num_viewports) 986 { 987 // TODO: what should we do here? 988 memset(&viewports[0], 0, sizeof(viewports[0])); 989 set_viewport(); 990 } 991 num_viewports = NumViewports; 992 } 993 994 virtual void STDMETHODCALLTYPE RSGetViewports( 995 __inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned *pNumViewports, 996 __out_ecount_opt(*pNumViewports) D3D11_VIEWPORT *pViewports) 997 { 998 SYNCHRONIZED; 999 if(pViewports) 1000 { 1001 unsigned i; 1002 for(i = 0; i < std::min(*pNumViewports, num_viewports); ++i) 1003 pViewports[i] = viewports[i]; 1004 1005 memset(pViewports + i, 0, (*pNumViewports - i) * sizeof(D3D11_VIEWPORT)); 1006 } 1007 1008 *pNumViewports = num_viewports; 1009 } 1010 1011 void set_scissor() 1012 { 1013 pipe_scissor_state scissor; 1014 scissor.minx = scissor_rects[0].left; 1015 scissor.miny = scissor_rects[0].top; 1016 scissor.maxx = scissor_rects[0].right; 1017 scissor.maxy = scissor_rects[0].bottom; 1018 pipe->set_scissor_state(pipe, &scissor); 1019 } 1020 1021 virtual void STDMETHODCALLTYPE RSSetScissorRects( 1022 __in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned NumRects, 1023 __in_ecount_opt(NumRects) const D3D11_RECT *pRects) 1024 { 1025 SYNCHRONIZED; 1026 if(NumRects) 1027 { 1028 if(memcmp(&scissor_rects[0], &pRects[0], sizeof(scissor_rects[0]))) 1029 { 1030 scissor_rects[0] = pRects[0]; 1031 set_scissor(); 1032 } 1033 for(unsigned i = 1; i < NumRects; ++i) 1034 scissor_rects[i] = pRects[i]; 1035 } 1036 else if(num_scissor_rects) 1037 { 1038 // TODO: what should we do here? 1039 memset(&scissor_rects[0], 0, sizeof(scissor_rects[0])); 1040 set_scissor(); 1041 } 1042 1043 num_scissor_rects = NumRects; 1044 } 1045 1046 virtual void STDMETHODCALLTYPE RSGetScissorRects( 1047 __inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned *pNumRects, 1048 __out_ecount_opt(*pNumRects) D3D11_RECT *pRects) 1049 { 1050 SYNCHRONIZED; 1051 if(pRects) 1052 { 1053 unsigned i; 1054 for(i = 0; i < std::min(*pNumRects, num_scissor_rects); ++i) 1055 pRects[i] = scissor_rects[i]; 1056 1057 memset(pRects + i, 0, (*pNumRects - i) * sizeof(D3D11_RECT)); 1058 } 1059 1060 *pNumRects = num_scissor_rects; 1061 } 1062 1063 virtual void STDMETHODCALLTYPE OMSetBlendState( 1064 __in_opt ID3D11BlendState *pBlendState, 1065 __in_opt const float BlendFactor[ 4 ], 1066 __in unsigned SampleMask) 1067 { 1068 SYNCHRONIZED; 1069 float white[4] = {1.0f, 1.0f, 1.0f, 1.0f}; 1070 1071 if(blend_state.p != pBlendState) 1072 { 1073 pipe->bind_blend_state(pipe, pBlendState ? ((GalliumD3D11BlendState*)pBlendState)->object : default_blend); 1074 blend_state = pBlendState; 1075 } 1076 1077 // Windows D3D11 does this, even though it's apparently undocumented 1078 if(!BlendFactor) 1079 BlendFactor = white; 1080 1081 if(memcmp(blend_color, BlendFactor, sizeof(blend_color))) 1082 { 1083 pipe->set_blend_color(pipe, (struct pipe_blend_color*)BlendFactor); 1084 memcpy(blend_color, BlendFactor, sizeof(blend_color)); 1085 } 1086 1087 if(sample_mask != SampleMask) 1088 { 1089 pipe->set_sample_mask(pipe, sample_mask); 1090 sample_mask = SampleMask; 1091 } 1092 } 1093 1094 virtual void STDMETHODCALLTYPE OMGetBlendState( 1095 __out_opt ID3D11BlendState **ppBlendState, 1096 __out_opt float BlendFactor[ 4 ], 1097 __out_opt unsigned *pSampleMask) 1098 { 1099 SYNCHRONIZED; 1100 if(ppBlendState) 1101 *ppBlendState = blend_state.ref(); 1102 if(BlendFactor) 1103 memcpy(BlendFactor, blend_color, sizeof(blend_color)); 1104 if(pSampleMask) 1105 *pSampleMask = sample_mask; 1106 } 1107 1108 void set_stencil_ref() 1109 { 1110 struct pipe_stencil_ref sref; 1111 sref.ref_value[0] = stencil_ref; 1112 sref.ref_value[1] = stencil_ref; 1113 pipe->set_stencil_ref(pipe, &sref); 1114 } 1115 1116 virtual void STDMETHODCALLTYPE OMSetDepthStencilState( 1117 __in_opt ID3D11DepthStencilState *pDepthStencilState, 1118 __in unsigned StencilRef) 1119 { 1120 SYNCHRONIZED; 1121 if(pDepthStencilState != depth_stencil_state.p) 1122 { 1123 pipe->bind_depth_stencil_alpha_state(pipe, pDepthStencilState ? ((GalliumD3D11DepthStencilState*)pDepthStencilState)->object : default_depth_stencil); 1124 depth_stencil_state = pDepthStencilState; 1125 } 1126 1127 if(StencilRef != stencil_ref) 1128 { 1129 stencil_ref = StencilRef; 1130 set_stencil_ref(); 1131 } 1132 } 1133 1134 virtual void STDMETHODCALLTYPE OMGetDepthStencilState( 1135 __out_opt ID3D11DepthStencilState **ppDepthStencilState, 1136 __out_opt unsigned *pStencilRef) 1137 { 1138 SYNCHRONIZED; 1139 if(*ppDepthStencilState) 1140 *ppDepthStencilState = depth_stencil_state.ref(); 1141 if(pStencilRef) 1142 *pStencilRef = stencil_ref; 1143 } 1144 1145 void set_framebuffer() 1146 { 1147 struct pipe_framebuffer_state fb; 1148 memset(&fb, 0, sizeof(fb)); 1149 if(depth_stencil_view) 1150 { 1151 struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object; 1152 fb.zsbuf = surf; 1153 if(surf->width > fb.width) 1154 fb.width = surf->width; 1155 if(surf->height > fb.height) 1156 fb.height = surf->height; 1157 } 1158 fb.nr_cbufs = num_render_target_views; 1159 unsigned i; 1160 for(i = 0; i < num_render_target_views; ++i) 1161 { 1162 if(render_target_views[i]) 1163 { 1164 struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object; 1165 fb.cbufs[i] = surf; 1166 if(surf->width > fb.width) 1167 fb.width = surf->width; 1168 if(surf->height > fb.height) 1169 fb.height = surf->height; 1170 } 1171 } 1172 1173 pipe->set_framebuffer_state(pipe, &fb); 1174 } 1175 1176 /* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't. 1177 * Hopefully nobody relies on this happening 1178 */ 1179 1180 virtual void STDMETHODCALLTYPE OMSetRenderTargets( 1181 __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumViews, 1182 __in_ecount_opt(NumViews) ID3D11RenderTargetView *const *ppRenderTargetViews, 1183 __in_opt ID3D11DepthStencilView *pDepthStencilView) 1184 { 1185 SYNCHRONIZED; 1186 if(!ppRenderTargetViews) 1187 NumViews = 0; 1188 if(NumViews == num_render_target_views) 1189 { 1190 for(unsigned i = 0; i < NumViews; ++i) 1191 { 1192 if(ppRenderTargetViews[i] != render_target_views[i].p) 1193 goto changed; 1194 } 1195 return; 1196 } 1197changed: 1198 depth_stencil_view = pDepthStencilView; 1199 unsigned i; 1200 for(i = 0; i < NumViews; ++i) 1201 { 1202 render_target_views[i] = ppRenderTargetViews[i]; 1203#if API >= 11 1204 om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL; 1205#endif 1206 } 1207 for(; i < num_render_target_views; ++i) 1208 render_target_views[i] = (ID3D11RenderTargetView*)NULL; 1209 num_render_target_views = NumViews; 1210 set_framebuffer(); 1211 } 1212 1213 virtual void STDMETHODCALLTYPE OMGetRenderTargets( 1214 __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumViews, 1215 __out_ecount_opt(NumViews) ID3D11RenderTargetView **ppRenderTargetViews, 1216 __out_opt ID3D11DepthStencilView **ppDepthStencilView) 1217 { 1218 SYNCHRONIZED; 1219 if(ppRenderTargetViews) 1220 { 1221 unsigned i; 1222 for(i = 0; i < std::min(num_render_target_views, NumViews); ++i) 1223 ppRenderTargetViews[i] = render_target_views[i].ref(); 1224 1225 for(; i < NumViews; ++i) 1226 ppRenderTargetViews[i] = 0; 1227 } 1228 1229 if(ppDepthStencilView) 1230 *ppDepthStencilView = depth_stencil_view.ref(); 1231 } 1232 1233#if API >= 11 1234 /* TODO: what is this supposed to do _exactly_? are we doing the right thing? */ 1235 virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews( 1236 __in unsigned NumRTVs, 1237 __in_ecount_opt(NumRTVs) ID3D11RenderTargetView *const *ppRenderTargetViews, 1238 __in_opt ID3D11DepthStencilView *pDepthStencilView, 1239 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned UAVStartSlot, 1240 __in unsigned NumUAVs, 1241 __in_ecount_opt(NumUAVs) ID3D11UnorderedAccessView *const *ppUnorderedAccessViews, 1242 __in_ecount_opt(NumUAVs) const unsigned *pUAVInitialCounts) 1243 { 1244 SYNCHRONIZED; 1245 if(NumRTVs != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) 1246 OMSetRenderTargets(NumRTVs, ppRenderTargetViews, pDepthStencilView); 1247 1248 if(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS) 1249 { 1250 for(unsigned i = 0; i < NumUAVs; ++i) 1251 { 1252 om_unordered_access_views[UAVStartSlot + i] = ppUnorderedAccessViews[i]; 1253 render_target_views[UAVStartSlot + i] = (ID3D11RenderTargetView*)0; 1254 } 1255 } 1256 } 1257 1258 virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews( 1259 __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumRTVs, 1260 __out_ecount_opt(NumRTVs) ID3D11RenderTargetView **ppRenderTargetViews, 1261 __out_opt ID3D11DepthStencilView **ppDepthStencilView, 1262 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned UAVStartSlot, 1263 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - UAVStartSlot) unsigned NumUAVs, 1264 __out_ecount_opt(NumUAVs) ID3D11UnorderedAccessView **ppUnorderedAccessViews) 1265 { 1266 SYNCHRONIZED; 1267 if(ppRenderTargetViews) 1268 OMGetRenderTargets(NumRTVs, ppRenderTargetViews, ppDepthStencilView); 1269 1270 if(ppUnorderedAccessViews) 1271 { 1272 for(unsigned i = 0; i < NumUAVs; ++i) 1273 ppUnorderedAccessViews[i] = om_unordered_access_views[UAVStartSlot + i].ref(); 1274 } 1275 } 1276#endif 1277 1278 virtual void STDMETHODCALLTYPE SOSetTargets( 1279 __in_range(0, D3D11_SO_BUFFER_SLOT_COUNT) unsigned NumBuffers, 1280 __in_ecount_opt(NumBuffers) ID3D11Buffer *const *ppSOTargets, 1281 __in_ecount_opt(NumBuffers) const unsigned *pOffsets) 1282 { 1283 SYNCHRONIZED; 1284 unsigned i; 1285 if(!ppSOTargets) 1286 NumBuffers = 0; 1287 bool changed = false; 1288 for(i = 0; i < NumBuffers; ++i) 1289 { 1290 ID3D11Buffer* buffer = ppSOTargets[i]; 1291 if(buffer != so_targets[i].p || pOffsets[i] != so_offsets[i]) 1292 { 1293 so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; 1294 so_targets[i] = buffer; 1295 so_offsets[i] = pOffsets[i]; 1296 changed = true; 1297 } 1298 } 1299 for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i) 1300 { 1301 if(so_targets[i].p || so_offsets[i]) 1302 { 1303 changed = true; 1304 so_targets[i] = (ID3D11Buffer*)0; 1305 so_offsets[i] = 0; 1306 } 1307 } 1308 num_so_targets = NumBuffers; 1309 1310 if(changed && caps.so) 1311 pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets); 1312 } 1313 1314 virtual void STDMETHODCALLTYPE SOGetTargets( 1315 __in_range(0, D3D11_SO_BUFFER_SLOT_COUNT) unsigned NumBuffers, 1316 __out_ecount(NumBuffers) ID3D11Buffer **ppSOTargets 1317#if API < 11 1318 , __out_ecount(NumBuffers) UINT *pOffsets 1319#endif 1320 ) 1321 { 1322 SYNCHRONIZED; 1323 for(unsigned i = 0; i < NumBuffers; ++i) 1324 { 1325 ppSOTargets[i] = so_targets[i].ref(); 1326#if API < 11 1327 pOffsets[i] = so_offsets[i]; 1328#endif 1329 } 1330 } 1331 1332 virtual void STDMETHODCALLTYPE Begin( 1333 __in ID3D11Asynchronous *pAsync) 1334 { 1335 SYNCHRONIZED; 1336 if(caps.queries) 1337 pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query); 1338 } 1339 1340 virtual void STDMETHODCALLTYPE End( 1341 __in ID3D11Asynchronous *pAsync) 1342 { 1343 SYNCHRONIZED; 1344 if(caps.queries) 1345 pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query); 1346 } 1347 1348 virtual HRESULT STDMETHODCALLTYPE GetData( 1349 __in ID3D11Asynchronous *pAsync, 1350 __out_bcount_opt(DataSize) void *pData, 1351 __in unsigned DataSize, 1352 __in unsigned GetDataFlags) 1353 { 1354 SYNCHRONIZED; 1355 if(!caps.queries) 1356 return E_NOTIMPL; 1357 1358 GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)pAsync; 1359 void* data = alloca(async->data_size); 1360 boolean ret = pipe->get_query_result(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query, !(GetDataFlags & D3D11_ASYNC_GETDATA_DONOTFLUSH), data); 1361 if(pData) 1362 memcpy(pData, data, std::min(async->data_size, DataSize)); 1363 return ret ? S_OK : S_FALSE; 1364 } 1365 1366 void set_render_condition() 1367 { 1368 if(caps.render_condition) 1369 { 1370 if(!render_predicate) 1371 pipe->render_condition(pipe, 0, 0); 1372 else 1373 { 1374 GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p; 1375 if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE) 1376 { 1377 unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT; 1378 pipe->render_condition(pipe, predicate->query, mode); 1379 } 1380 else 1381 { 1382 /* TODO: add inverted predication to Gallium*/ 1383 pipe->render_condition(pipe, 0, 0); 1384 } 1385 } 1386 } 1387 } 1388 1389 virtual void STDMETHODCALLTYPE SetPredication( 1390 __in_opt ID3D11Predicate *pPredicate, 1391 __in BOOL PredicateValue) 1392 { 1393 SYNCHRONIZED; 1394 if(render_predicate.p != pPredicate || render_predicate_value != PredicateValue) 1395 { 1396 render_predicate = pPredicate; 1397 render_predicate_value = PredicateValue; 1398 set_render_condition(); 1399 } 1400 } 1401 1402 virtual void STDMETHODCALLTYPE GetPredication( 1403 __out_opt ID3D11Predicate **ppPredicate, 1404 __out_opt BOOL *pPredicateValue) 1405 { 1406 SYNCHRONIZED; 1407 if(ppPredicate) 1408 *ppPredicate = render_predicate.ref(); 1409 if(pPredicateValue) 1410 *pPredicateValue = render_predicate_value; 1411 } 1412 1413 static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource) 1414 { 1415 pipe_subresource sr; 1416 if(subresource <= resource->last_level) 1417 { 1418 sr.level = subresource; 1419 sr.face = 0; 1420 } 1421 else 1422 { 1423 unsigned levels = resource->last_level + 1; 1424 sr.level = subresource % levels; 1425 sr.face = subresource / levels; 1426 } 1427 return sr; 1428 } 1429 1430 virtual HRESULT STDMETHODCALLTYPE Map( 1431 __in ID3D11Resource *pResource, 1432 __in unsigned Subresource, 1433 __in D3D11_MAP MapType, 1434 __in unsigned MapFlags, 1435 __out D3D11_MAPPED_SUBRESOURCE *pMappedResource) 1436 { 1437 SYNCHRONIZED; 1438 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource; 1439 if(resource->transfers.count(Subresource)) 1440 return E_FAIL; 1441 pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, Subresource); 1442 pipe_box box; 1443 d3d11_to_pipe_box(resource->resource, sr.level, 0); 1444 unsigned usage = 0; 1445 if(MapType == D3D11_MAP_READ) 1446 usage = PIPE_TRANSFER_READ; 1447 else if(MapType == D3D11_MAP_WRITE) 1448 usage = PIPE_TRANSFER_WRITE; 1449 else if(MapType == D3D11_MAP_READ_WRITE) 1450 usage = PIPE_TRANSFER_READ_WRITE; 1451 else if(MapType == D3D11_MAP_WRITE_DISCARD) 1452 usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD; 1453 else if(MapType == D3D11_MAP_WRITE_NO_OVERWRITE) 1454 usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE; 1455 else 1456 return E_INVALIDARG; 1457 if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT) 1458 usage |= PIPE_TRANSFER_DONTBLOCK; 1459 struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box); 1460 if(!transfer) { 1461 if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT) 1462 return DXGI_ERROR_WAS_STILL_DRAWING; 1463 else 1464 return E_FAIL; 1465 } 1466 resource->transfers[Subresource] = transfer; 1467 pipe->transfer_map(pipe, transfer); 1468 pMappedResource->pData = transfer->data; 1469 pMappedResource->RowPitch = transfer->stride; 1470 pMappedResource->DepthPitch = transfer->slice_stride; 1471 return S_OK; 1472 } 1473 1474 virtual void STDMETHODCALLTYPE Unmap( 1475 __in ID3D11Resource *pResource, 1476 __in unsigned Subresource) 1477 { 1478 SYNCHRONIZED; 1479 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource; 1480 std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(Subresource); 1481 if(i != resource->transfers.end()) 1482 { 1483 pipe->transfer_unmap(pipe, i->second); 1484 pipe->transfer_destroy(pipe, i->second); 1485 resource->transfers.erase(i); 1486 } 1487 } 1488 1489 virtual void STDMETHODCALLTYPE CopySubresourceRegion( 1490 __in ID3D11Resource *pDstResource, 1491 __in unsigned DstSubresource, 1492 __in unsigned DstX, 1493 __in unsigned DstY, 1494 __in unsigned DstZ, 1495 __in ID3D11Resource *pSrcResource, 1496 __in unsigned SrcSubresource, 1497 __in_opt const D3D11_BOX *pSrcBox) 1498 { 1499 SYNCHRONIZED; 1500 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource; 1501 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource; 1502 pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource); 1503 pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource); 1504 pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, pSrcBox); 1505 for(unsigned i = 0; i < box.depth; ++i) 1506 { 1507 pipe->resource_copy_region(pipe, 1508 dst->resource, subdst, DstX, DstY, DstZ + i, 1509 src->resource, subsrc, box.x, box.y, box.z + i, 1510 box.width, box.height); 1511 } 1512 } 1513 1514 virtual void STDMETHODCALLTYPE CopyResource( 1515 __in ID3D11Resource *pDstResource, 1516 __in ID3D11Resource *pSrcResource) 1517 { 1518 SYNCHRONIZED; 1519 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource; 1520 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource; 1521 pipe_subresource sr; 1522 unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1; 1523 1524 for(sr.face = 0; sr.face < faces; ++sr.face) 1525 { 1526 for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level) 1527 { 1528 unsigned w = u_minify(dst->resource->width0, sr.level); 1529 unsigned h = u_minify(dst->resource->height0, sr.level); 1530 unsigned d = u_minify(dst->resource->depth0, sr.level); 1531 for(unsigned i = 0; i < d; ++i) 1532 { 1533 pipe->resource_copy_region(pipe, 1534 dst->resource, sr, 0, 0, i, 1535 src->resource, sr, 0, 0, i, 1536 w, h); 1537 } 1538 } 1539 } 1540 } 1541 1542 virtual void STDMETHODCALLTYPE UpdateSubresource( 1543 __in ID3D11Resource *pDstResource, 1544 __in unsigned DstSubresource, 1545 __in_opt const D3D11_BOX *pDstBox, 1546 __in const void *pSrcData, 1547 __in unsigned SrcRowPitch, 1548 __in unsigned SrcDepthPitch) 1549 { 1550 SYNCHRONIZED; 1551 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource; 1552 pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource); 1553 pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox); 1554 pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, SrcRowPitch, SrcDepthPitch); 1555 } 1556 1557#if API >= 11 1558 virtual void STDMETHODCALLTYPE CopyStructureCount( 1559 __in ID3D11Buffer *pDstBuffer, 1560 __in unsigned DstAlignedByteOffset, 1561 __in ID3D11UnorderedAccessView *pSrcView) 1562 { 1563 SYNCHRONIZED; 1564 } 1565#endif 1566 1567 virtual void STDMETHODCALLTYPE ClearRenderTargetView( 1568 __in ID3D11RenderTargetView *pRenderTargetView, 1569 __in const float ColorRGBA[4]) 1570 { 1571 SYNCHRONIZED; 1572 GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)pRenderTargetView); 1573 pipe->clear_render_target(pipe, view->object, ColorRGBA, 0, 0, view->object->width, view->object->height); 1574 } 1575 1576 virtual void STDMETHODCALLTYPE ClearDepthStencilView( 1577 __in ID3D11DepthStencilView *pDepthStencilView, 1578 __in unsigned ClearFlags, 1579 __in float Depth, 1580 __in UINT8 Stencil) 1581 { 1582 SYNCHRONIZED; 1583 GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)pDepthStencilView); 1584 unsigned flags = 0; 1585 if(ClearFlags & D3D11_CLEAR_DEPTH) 1586 flags |= PIPE_CLEAR_DEPTH; 1587 if(ClearFlags & D3D11_CLEAR_STENCIL) 1588 flags |= PIPE_CLEAR_STENCIL; 1589 pipe->clear_depth_stencil(pipe, view->object, flags, Depth, Stencil, 0, 0, view->object->width, view->object->height); 1590 } 1591 1592#if API >= 11 1593 virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint( 1594 __in ID3D11UnorderedAccessView *pUnorderedAccessView, 1595 __in const unsigned Values[ 4 ]) 1596 { 1597 SYNCHRONIZED; 1598 } 1599 1600 virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat( 1601 __in ID3D11UnorderedAccessView *pUnorderedAccessView, 1602 __in const float Values[ 4 ]) 1603 { 1604 SYNCHRONIZED; 1605 } 1606#endif 1607 1608 void restore_gallium_state_blit_only() 1609 { 1610 pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend); 1611 pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil); 1612 pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer); 1613 pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout); 1614 pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]); 1615 pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]); 1616 if(caps.gs) 1617 pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]); 1618 set_framebuffer(); 1619 set_viewport(); 1620 set_clip(); 1621 set_render_condition(); 1622 // TODO: restore stream output 1623 1624 update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS)); 1625 } 1626 1627 virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly() 1628 { 1629 SYNCHRONIZED; 1630 restore_gallium_state_blit_only(); 1631 } 1632 1633 virtual void STDMETHODCALLTYPE GenerateMips( 1634 __in ID3D11ShaderResourceView *pShaderResourceView) 1635 { 1636 SYNCHRONIZED; 1637 1638 GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)pShaderResourceView; 1639 if(caps.gs) 1640 pipe->bind_gs_state(pipe, 0); 1641 if(caps.so) 1642 pipe->bind_stream_output_state(pipe, 0); 1643 if(pipe->render_condition) 1644 pipe->render_condition(pipe, 0, 0); 1645 util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR); 1646 restore_gallium_state_blit_only(); 1647 } 1648 1649 virtual void STDMETHODCALLTYPE RestoreGalliumState() 1650 { 1651 SYNCHRONIZED; 1652 restore_gallium_state_blit_only(); 1653 1654 set_index_buffer(); 1655 set_stencil_ref(); 1656 pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color); 1657 pipe->set_sample_mask(pipe, sample_mask); 1658 1659 for(unsigned s = 0; s < 3; ++s) 1660 { 1661 unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); 1662 for(unsigned i = 0; i < num; ++i) 1663 pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0); 1664 } 1665 1666 if(caps.so) 1667 pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets); 1668 1669 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS)); 1670 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS)); 1671 1672 set_scissor(); 1673 } 1674 1675#if API >= 11 1676 /* TODO: hack SRVs or sampler states to handle this, or add to Gallium */ 1677 virtual void STDMETHODCALLTYPE SetResourceMinLOD( 1678 __in ID3D11Resource *pResource, 1679 float MinLOD) 1680 { 1681 SYNCHRONIZED; 1682 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource; 1683 if(resource->min_lod != MinLOD) 1684 { 1685 // TODO: actually do anything? 1686 resource->min_lod = MinLOD; 1687 } 1688 } 1689 1690 virtual float STDMETHODCALLTYPE GetResourceMinLOD( 1691 __in ID3D11Resource *pResource) 1692 { 1693 SYNCHRONIZED; 1694 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource; 1695 return resource->min_lod; 1696 } 1697#endif 1698 1699 virtual void STDMETHODCALLTYPE ResolveSubresource( 1700 __in ID3D11Resource *pDstResource, 1701 __in unsigned DstSubresource, 1702 __in ID3D11Resource *pSrcResource, 1703 __in unsigned SrcSubresource, 1704 __in DXGI_FORMAT Format) 1705 { 1706 SYNCHRONIZED; 1707 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource; 1708 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource; 1709 pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource); 1710 pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource); 1711 pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc); 1712 } 1713 1714#if API >= 11 1715 virtual void STDMETHODCALLTYPE ExecuteCommandList( 1716 __in ID3D11CommandList *pCommandList, 1717 BOOL RestoreContextState) 1718 { 1719 SYNCHRONIZED; 1720 } 1721 1722 virtual HRESULT STDMETHODCALLTYPE FinishCommandList( 1723 BOOL RestoreDeferredContextState, 1724 __out_opt ID3D11CommandList **ppCommandList) 1725 { 1726 SYNCHRONIZED; 1727 return E_NOTIMPL; 1728 } 1729#endif 1730 1731 virtual void STDMETHODCALLTYPE ClearState(void) 1732 { 1733 /* we don't take a lock here because we would deadlock otherwise 1734 * TODO: this is probably incorrect, because ClearState should likely be atomic. 1735 * However, I can't think of any correct usage that would be affected by this 1736 * being non-atomic, and making this atomic is quite expensive and complicates 1737 * the code 1738 */ 1739 1740 // we qualify all calls so that we avoid virtual dispatch and might get them inlined 1741 // TODO: make sure all this gets inlined, which might require more compiler flags 1742 // TODO: optimize this 1743#if API >= 11 1744 GalliumD3D11DeviceContext::PSSetShader(0, 0, 0); 1745 GalliumD3D11DeviceContext::GSSetShader(0, 0, 0); 1746 GalliumD3D11DeviceContext::VSSetShader(0, 0, 0); 1747 GalliumD3D11DeviceContext::HSSetShader(0, 0, 0); 1748 GalliumD3D11DeviceContext::DSSetShader(0, 0, 0); 1749 GalliumD3D11DeviceContext::CSSetShader(0, 0, 0); 1750#else 1751 GalliumD3D11DeviceContext::PSSetShader(0); 1752 GalliumD3D11DeviceContext::GSSetShader(0); 1753 GalliumD3D11DeviceContext::VSSetShader(0); 1754#endif 1755 1756 GalliumD3D11DeviceContext::IASetInputLayout(0); 1757 GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0); 1758 GalliumD3D11DeviceContext::RSSetState(0); 1759 GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0); 1760 GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0); 1761 GalliumD3D11DeviceContext::SetPredication(0, 0); 1762 GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED); 1763 1764 GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); 1765 GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); 1766 GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); 1767#if API >= 11 1768 GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); 1769 GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); 1770 GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data); 1771#endif 1772 1773 GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data); 1774#if API >= 11 1775 GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0); 1776#else 1777 GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 ); 1778#endif 1779 GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0); 1780 1781 GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data); 1782 GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data); 1783 GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data); 1784#if API >= 11 1785 GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data); 1786 GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data); 1787 GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data); 1788#endif 1789 1790 GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data); 1791 GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data); 1792 GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data); 1793#if API >= 11 1794 GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data); 1795 GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data); 1796 GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data); 1797#endif 1798 1799 GalliumD3D11DeviceContext::RSSetViewports(0, 0); 1800 GalliumD3D11DeviceContext::RSSetScissorRects(0, 0); 1801 } 1802 1803 virtual void STDMETHODCALLTYPE Flush(void) 1804 { 1805 SYNCHRONIZED; 1806 pipe->flush(pipe, PIPE_FLUSH_FRAME, 0); 1807 } 1808 1809 /* In Direct3D 10, if the reference count of an object drops to 0, it is automatically 1810 * cleanly unbound from the pipeline. 1811 * In Direct3D 11, the pipeline holds a reference. 1812 * 1813 * Note that instead of always scanning the pipeline on destruction, we could 1814 * maintain the internal reference count on DirectX 10 and use it to check if an 1815 * object is still bound. 1816 * Presumably, on average, scanning is faster if the application is well written. 1817 */ 1818#if API < 11 1819#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \ 1820 void Unbind##name(ID3D11##name* state) \ 1821 { \ 1822 SYNCHRONIZED; \ 1823 if((void*)state == (void*)member.p) \ 1824 { \ 1825 member.p = 0; \ 1826 pipe->bind_##gallium##_state(pipe, default_##def); \ 1827 } \ 1828 } 1829 IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend) 1830 IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer) 1831 IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil) 1832 IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout) 1833 IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS]) 1834 IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS]) 1835 IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS]) 1836 1837 void UnbindPredicate(ID3D11Predicate* predicate) 1838 { 1839 SYNCHRONIZED; 1840 if(predicate == render_predicate) 1841 { 1842 render_predicate.p = NULL; 1843 render_predicate_value = 0; 1844 pipe->render_condition(pipe, 0, 0); 1845 } 1846 } 1847 1848 void UnbindSamplerState(ID3D11SamplerState* state) 1849 { 1850 SYNCHRONIZED; 1851 for(unsigned s = 0; s < D3D11_STAGES; ++s) 1852 { 1853 for(unsigned i = 0; i < num_samplers[s]; ++i) 1854 { 1855 if(samplers[s][i] == state) 1856 { 1857 samplers[s][i].p = NULL; 1858 sampler_csos[s].v[i] = NULL; 1859 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)); 1860 } 1861 } 1862 } 1863 } 1864 1865 void UnbindBuffer(ID3D11Buffer* buffer) 1866 { 1867 SYNCHRONIZED; 1868 if(buffer == index_buffer) 1869 { 1870 index_buffer.p = 0; 1871 index_format = DXGI_FORMAT_UNKNOWN; 1872 index_offset = 0; 1873 struct pipe_index_buffer ib; 1874 memset(&ib, 0, sizeof(ib)); 1875 pipe->set_index_buffer(pipe, &ib); 1876 } 1877 1878 for(unsigned i = 0; i < num_vertex_buffers; ++i) 1879 { 1880 if(buffer == input_buffers[i]) 1881 { 1882 input_buffers[i].p = 0; 1883 memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers])); 1884 update_flags |= UPDATE_VERTEX_BUFFERS; 1885 } 1886 } 1887 1888 for(unsigned s = 0; s < D3D11_STAGES; ++s) 1889 { 1890 for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i) 1891 { 1892 if(constant_buffers[s][i] == buffer) 1893 { 1894 constant_buffers[s][i] = (ID3D10Buffer*)NULL; 1895 pipe->set_constant_buffer(pipe, s, i, NULL); 1896 } 1897 } 1898 } 1899 } 1900 1901 void UnbindDepthStencilView(ID3D11DepthStencilView* view) 1902 { 1903 SYNCHRONIZED; 1904 if(view == depth_stencil_view) 1905 { 1906 depth_stencil_view.p = NULL; 1907 set_framebuffer(); 1908 } 1909 } 1910 1911 void UnbindRenderTargetView(ID3D11RenderTargetView* view) 1912 { 1913 SYNCHRONIZED; 1914 bool any_bound = false; 1915 for(unsigned i = 0; i < num_render_target_views; ++i) 1916 { 1917 if(render_target_views[i] == view) 1918 { 1919 render_target_views[i].p = NULL; 1920 any_bound = true; 1921 } 1922 } 1923 if(any_bound) 1924 set_framebuffer(); 1925 } 1926 1927 void UnbindShaderResourceView(ID3D11ShaderResourceView* view) 1928 { 1929 SYNCHRONIZED; 1930 for(unsigned s = 0; s < D3D11_STAGES; ++s) 1931 { 1932 for(unsigned i = 0; i < num_shader_resource_views[s]; ++i) 1933 { 1934 if(shader_resource_views[s][i] == view) 1935 { 1936 shader_resource_views[s][i].p = NULL; 1937 sampler_views[s][i] = NULL; 1938 update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s)); 1939 } 1940 } 1941 } 1942 } 1943#endif 1944 1945#undef SYNCHRONIZED 1946}; 1947 1948#if API >= 11 1949/* This approach serves two purposes. 1950 * First, we don't want to do an atomic operation to manipulate the reference 1951 * count every time something is bound/unbound to the pipeline, since they are 1952 * expensive. 1953 * Fortunately, the immediate context can only be used by a single thread, so 1954 * we don't have to use them, as long as a separate reference count is used 1955 * (see dual_refcnt_t). 1956 * 1957 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device 1958 * garbage cycle. 1959 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds 1960 * one for each external reference count, while internal nonatomic_add_ref doesn't 1961 * add any. 1962 * 1963 * Note that ideally we would to eliminate the non-atomic op too, but this is more 1964 * complicated, since we would either need to use garbage collection and give up 1965 * deterministic destruction (especially bad for large textures), or scan the whole 1966 * pipeline state every time the reference count of object drops to 0, which risks 1967 * pathological slowdowns. 1968 * 1969 * Since this microoptimization should matter relatively little, let's avoid it for now. 1970 * 1971 * Note that deferred contexts don't use this, since as a whole, they must thread-safe. 1972 * Eliminating the atomic ops for deferred contexts seems substantially harder. 1973 * This might be a problem if they are used in a one-shot multithreaded rendering 1974 * fashion, where SMP cacheline bouncing on the reference count may be visible. 1975 * 1976 * The idea would be to attach a structure of reference counts indexed by deferred 1977 * context id to each object. Ideally, this should be organized like ext2 block pointers. 1978 * 1979 * Every deferred context would get a reference count in its own cacheline. 1980 * The external count is protected by a lock bit, and there is also a "lock bit" in each 1981 * internal count. 1982 * 1983 * When the external count has to be dropped to 0, the lock bit is taken and all internal 1984 * reference counts are scanned, taking a count of them. A flag would also be set on them. 1985 * Deferred context manipulation would notice the flag, and update the count. 1986 * Once the count goes to zero, the object is freed. 1987 * 1988 * The problem of this is that if the external reference count ping-pongs between 1989 * zero and non-zero, the scans will take a lot of time. 1990 * 1991 * The idea to solve this is to compute the scans in a binary-tree like fashion, where 1992 * each binary tree node would have a "determined bit", which would be invalidated 1993 * by manipulations. 1994 * 1995 * However, all this complexity might actually be a loss in most cases, so let's just 1996 * stick to a single atomic refcnt for now. 1997 * 1998 * Also, we don't even support deferred contexts yet, so this can wait. 1999 */ 2000struct nonatomic_device_child_ptr_traits 2001{ 2002 static void add_ref(void* p) 2003 { 2004 if(p) 2005 ((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref(); 2006 } 2007 2008 static void release(void* p) 2009 { 2010 if(p) 2011 ((GalliumD3D11DeviceChild<>*)p)->nonatomic_release(); 2012 } 2013}; 2014 2015struct GalliumD3D11ImmediateDeviceContext 2016 : public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits> 2017{ 2018 GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0) 2019 : GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags) 2020 { 2021 // not necessary, but tests that the API at least basically works 2022 ClearState(); 2023 } 2024 2025 /* we do this since otherwise we would have a garbage cycle between this and the device */ 2026 virtual ULONG STDMETHODCALLTYPE AddRef() 2027 { 2028 return this->device->AddRef(); 2029 } 2030 2031 virtual ULONG STDMETHODCALLTYPE Release() 2032 { 2033 return this->device->Release(); 2034 } 2035 2036 virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType() 2037 { 2038 return D3D11_DEVICE_CONTEXT_IMMEDIATE; 2039 } 2040}; 2041 2042static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe) 2043{ 2044 return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe); 2045} 2046 2047static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context) 2048{ 2049 ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState(); 2050} 2051 2052static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context) 2053{ 2054 ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly(); 2055} 2056 2057static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context) 2058{ 2059 delete (GalliumD3D11ImmediateDeviceContext*)context; 2060} 2061#endif 2062