d3d11_context.h revision 27b0c9d51300712f3b5d4af6cc20d53fcf9c8c46
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	uint32_t strip_cut_index;
75	BOOL render_predicate_value;
76	float blend_color[4];
77	unsigned sample_mask;
78	unsigned stencil_ref;
79	bool depth_clamp;
80
81	void* default_input_layout;
82	void* default_rasterizer;
83	void* default_depth_stencil;
84	void* default_blend;
85	void* default_sampler;
86	void* default_shaders[D3D11_STAGES];
87
88	// derived state
89	int primitive_mode;
90	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93	void* sampler_csos[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
94	struct pipe_resource* buffers[D3D11_SO_BUFFER_SLOT_COUNT];
95	unsigned num_shader_resource_views[D3D11_STAGES];
96	unsigned num_samplers[D3D11_STAGES];
97	unsigned num_vertex_buffers;
98	unsigned num_render_target_views;
99	unsigned num_viewports;
100	unsigned num_scissor_rects;
101	unsigned num_so_targets;
102
103	struct pipe_context* pipe;
104	unsigned update_flags;
105
106	bool owns_pipe;
107	unsigned context_flags;
108
109	GalliumD3D11Caps caps;
110
111	cso_context* cso_ctx;
112	gen_mipmap_state* gen_mipmap;
113
114#if API >= 11
115#define SYNCHRONIZED do {} while(0)
116
117	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
118	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
119	{
120		caps = device->screen_caps;
121		init_context();
122	}
123
124	~GalliumD3D11DeviceContext()
125	{
126		destroy_context();
127	}
128#else
129#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
130
131	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
132	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
133	{
134		caps = this->screen_caps;
135		init_context();
136	}
137
138	~GalliumD3D10Device()
139	{
140		destroy_context();
141	}
142#endif
143
144	void init_context()
145	{
146		if(!pipe->begin_query)
147			caps.queries = false;
148		if(!pipe->bind_gs_state)
149		{
150			caps.gs = false;
151			caps.stages = 2;
152		}
153		if(!pipe->set_stream_output_buffers)
154			caps.so = false;
155		if(!pipe->set_geometry_sampler_views)
156			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
157		if(!pipe->set_fragment_sampler_views)
158			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
159		if(!pipe->set_vertex_sampler_views)
160			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
161
162		update_flags = 0;
163
164		// pipeline state
165		memset(viewports, 0, sizeof(viewports));
166		memset(scissor_rects, 0, sizeof(scissor_rects));
167		memset(so_offsets, 0, sizeof(so_offsets));
168		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
169		index_format = DXGI_FORMAT_UNKNOWN;
170		index_offset = 0;
171		strip_cut_index = 0xffffffff;
172		render_predicate_value = 0;
173		memset(blend_color, 0, sizeof(blend_color));
174		sample_mask = ~0;
175		stencil_ref = 0;
176		depth_clamp = 0;
177
178		// derived state
179		primitive_mode = 0;
180		memset(vertex_buffers, 0, sizeof(vertex_buffers));
181		memset(so_buffers, 0, sizeof(so_buffers));
182		memset(sampler_views, 0, sizeof(sampler_views));
183		memset(sampler_csos, 0, sizeof(sampler_csos));
184		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
185		memset(num_samplers, 0, sizeof(num_samplers));
186		num_vertex_buffers = 0;
187		num_render_target_views = 0;
188		num_viewports = 0;
189		num_scissor_rects = 0;
190		num_so_targets = 0;
191
192		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
193
194		struct pipe_rasterizer_state rasterizerd;
195		memset(&rasterizerd, 0, sizeof(rasterizerd));
196		rasterizerd.gl_rasterization_rules = 1;
197		rasterizerd.cull_face = PIPE_FACE_BACK;
198		rasterizerd.flatshade_first = 1;
199		rasterizerd.line_width = 1.0f;
200		rasterizerd.point_size = 1.0f;
201		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
202
203		struct pipe_depth_stencil_alpha_state depth_stencild;
204		memset(&depth_stencild, 0, sizeof(depth_stencild));
205		depth_stencild.depth.enabled = TRUE;
206		depth_stencild.depth.writemask = 1;
207		depth_stencild.depth.func = PIPE_FUNC_LESS;
208		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
209
210		struct pipe_blend_state blendd;
211		memset(&blendd, 0, sizeof(blendd));
212		blendd.rt[0].colormask = 0xf;
213		default_blend = pipe->create_blend_state(pipe, &blendd);
214
215		struct pipe_sampler_state samplerd;
216		memset(&samplerd, 0, sizeof(samplerd));
217		samplerd.normalized_coords = 1;
218		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
219		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
220		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
221		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
222		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
223		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
224		samplerd.border_color.f[0] = 1.0f;
225		samplerd.border_color.f[1] = 1.0f;
226		samplerd.border_color.f[2] = 1.0f;
227		samplerd.border_color.f[3] = 1.0f;
228		samplerd.min_lod = -FLT_MAX;
229		samplerd.max_lod = FLT_MAX;
230		samplerd.max_anisotropy = 1;
231		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
232
233		memset(&samplerd, 0, sizeof(samplerd));
234		samplerd.normalized_coords = 0;
235		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
236		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
237		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
238		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
239		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
240		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
241		samplerd.min_lod = -FLT_MAX;
242		samplerd.max_lod = FLT_MAX;
243		samplerd.max_anisotropy = 1;
244
245		for(unsigned s = 0; s < D3D11_STAGES; ++s)
246			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
247				sampler_csos[s][i] = default_sampler;
248
249		// TODO: should this really be empty shaders, or should they be all-passthrough?
250		memset(default_shaders, 0, sizeof(default_shaders));
251		struct ureg_program *ureg;
252		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
253		ureg_END(ureg);
254		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
255
256		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
257		ureg_END(ureg);
258		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
259
260		cso_ctx = cso_create_context(pipe);
261		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
262
263		RestoreGalliumState();
264	}
265
266	void destroy_context()
267	{
268		util_destroy_gen_mipmap(gen_mipmap);
269		cso_destroy_context(cso_ctx);
270
271		pipe->bind_vertex_elements_state(pipe, 0);
272		pipe->delete_vertex_elements_state(pipe, default_input_layout);
273
274		pipe->bind_rasterizer_state(pipe, 0);
275		pipe->delete_rasterizer_state(pipe, default_rasterizer);
276
277		pipe->bind_depth_stencil_alpha_state(pipe, 0);
278		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
279
280		pipe->bind_blend_state(pipe, 0);
281		pipe->delete_blend_state(pipe, default_blend);
282
283		pipe->bind_fragment_sampler_states(pipe, 0, 0);
284		pipe->bind_vertex_sampler_states(pipe, 0, 0);
285		if(pipe->bind_geometry_sampler_states)
286			pipe->bind_geometry_sampler_states(pipe, 0, 0);
287		pipe->delete_sampler_state(pipe, default_sampler);
288
289		pipe->bind_fs_state(pipe, 0);
290		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
291
292		pipe->bind_vs_state(pipe, 0);
293		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
294
295		if(owns_pipe)
296			pipe->destroy(pipe);
297	}
298
299	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
300	{
301		return context_flags;
302	}
303#if API >= 11
304#define SET_SHADER_EXTRA_ARGS , \
305	ID3D11ClassInstance *const *ppClassInstances, \
306	unsigned count
307#define GET_SHADER_EXTRA_ARGS , \
308		ID3D11ClassInstance **ppClassInstances, \
309		unsigned *out_count
310#else
311#define SET_SHADER_EXTRA_ARGS
312#define GET_SHADER_EXTRA_ARGS
313#endif
314
315/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
316 * Instead, you have to pass a pointer to nulls to unbind things.
317 * We do the same.
318 * TODO: is D3D10 the same?
319 */
320	template<unsigned s>
321	void xs_set_shader(GalliumD3D11Shader<>* shader)
322	{
323		if(shader != shaders[s].p)
324		{
325			shaders[s] = shader;
326			void* shader_cso = shader ? shader->object : default_shaders[s];
327			switch(s)
328			{
329			case PIPE_SHADER_VERTEX:
330				pipe->bind_vs_state(pipe, shader_cso);
331				break;
332			case PIPE_SHADER_FRAGMENT:
333				pipe->bind_fs_state(pipe, shader_cso);
334				break;
335			case PIPE_SHADER_GEOMETRY:
336				pipe->bind_gs_state(pipe, shader_cso);
337				break;
338			}
339			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
340		}
341	}
342
343	template<unsigned s>
344	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
345	{
346		for(unsigned i = 0; i < count; ++i)
347		{
348			if(constbufs[i] != constant_buffers[s][start + i].p)
349			{
350				constant_buffers[s][start + i] = constbufs[i];
351				if(s < caps.stages && start + i < caps.constant_buffers[s])
352					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
353			}
354		}
355	}
356
357	template<unsigned s>
358	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
359	{
360		int last_different = -1;
361		for(unsigned i = 0; i < count; ++i)
362		{
363			if(shader_resource_views[s][start + i].p != srvs[i])
364			{
365				shader_resource_views[s][start + i] = srvs[i];
366				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
367				last_different = i;
368			}
369		}
370		if(last_different >= 0)
371		{
372			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
373			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
374		}
375	}
376
377	template<unsigned s>
378	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
379	{
380		int last_different = -1;
381		for(unsigned i = 0; i < count; ++i)
382		{
383			if(samplers[s][start + i].p != samps[i])
384			{
385				samplers[s][start + i] = samps[i];
386				sampler_csos[s][start + i] = samps[i] ? samps[i]->object : default_sampler;
387				last_different = i;
388			}
389		}
390		if(last_different >= 0)
391		{
392			num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
393			update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
394		}
395	}
396
397#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
398	virtual void STDMETHODCALLTYPE XS##SetShader( \
399		ID3D11##Stage##Shader *pShader \
400		SET_SHADER_EXTRA_ARGS) \
401	{ \
402		SYNCHRONIZED; \
403		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
404	} \
405	virtual void STDMETHODCALLTYPE XS##GetShader(\
406		ID3D11##Stage##Shader **ppShader \
407		GET_SHADER_EXTRA_ARGS) \
408	{ \
409		SYNCHRONIZED; \
410		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
411	} \
412	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
413		unsigned start, \
414		unsigned count, \
415		ID3D11Buffer *const* constant_buffers) \
416	{ \
417		SYNCHRONIZED; \
418		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
419	} \
420	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
421		unsigned start, \
422		unsigned count, \
423		ID3D11Buffer **out_constant_buffers) \
424	{ \
425		SYNCHRONIZED; \
426		for(unsigned i = 0; i < count; ++i) \
427			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
428	} \
429	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
430		unsigned start, \
431		unsigned count, \
432		ID3D11ShaderResourceView *const *new_shader_resource_views) \
433	{ \
434		SYNCHRONIZED; \
435		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
436	} \
437	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
438		unsigned start, \
439		unsigned count, \
440		ID3D11ShaderResourceView **out_shader_resource_views) \
441	{ \
442		SYNCHRONIZED; \
443		for(unsigned i = 0; i < count; ++i) \
444			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
445	} \
446	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
447		unsigned start, \
448		unsigned count, \
449		ID3D11SamplerState *const *new_samplers) \
450	{ \
451		SYNCHRONIZED; \
452		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
453	} \
454	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
455		unsigned start, \
456		unsigned count, \
457		ID3D11SamplerState **out_samplers) \
458	{ \
459		SYNCHRONIZED; \
460		for(unsigned i = 0; i < count; ++i) \
461			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
462	}
463
464#define DO_VS(x) x
465#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
466#define DO_PS(x) x
467#define DO_HS(x)
468#define DO_DS(x)
469#define DO_CS(x)
470	IMPLEMENT_SHADER_STAGE(VS, Vertex)
471	IMPLEMENT_SHADER_STAGE(GS, Geometry)
472	IMPLEMENT_SHADER_STAGE(PS, Pixel)
473
474#if API >= 11
475	IMPLEMENT_SHADER_STAGE(HS, Hull)
476	IMPLEMENT_SHADER_STAGE(DS, Domain)
477	IMPLEMENT_SHADER_STAGE(CS, Compute)
478
479	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
480		unsigned start,
481		unsigned count,
482		ID3D11UnorderedAccessView *const *new_unordered_access_views,
483		const unsigned *new_uav_initial_counts)
484	{
485		SYNCHRONIZED;
486		for(unsigned i = 0; i < count; ++i)
487			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
488	}
489
490	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
491		unsigned start,
492		unsigned count,
493		ID3D11UnorderedAccessView **out_unordered_access_views)
494	{
495		SYNCHRONIZED;
496		for(unsigned i = 0; i < count; ++i)
497			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
498	}
499#endif
500
501	template<unsigned s>
502	void update_stage()
503	{
504		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
505		{
506			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
507				--num_shader_resource_views[s];
508			if((1 << s) & caps.stages_with_sampling)
509			{
510				const unsigned num_views_to_bind = num_shader_resource_views[s];
511				switch(s)
512				{
513				case PIPE_SHADER_VERTEX:
514					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, sampler_views[s]);
515					break;
516				case PIPE_SHADER_FRAGMENT:
517					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, sampler_views[s]);
518					break;
519				case PIPE_SHADER_GEOMETRY:
520					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, sampler_views[s]);
521					break;
522				}
523			}
524		}
525
526		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
527		{
528			while(num_samplers[s] && !sampler_csos[s][num_samplers[s] - 1])
529				--num_samplers[s];
530			if((1 << s) & caps.stages_with_sampling)
531			{
532				const unsigned num_samplers_to_bind = num_samplers[s];
533				switch(s)
534				{
535				case PIPE_SHADER_VERTEX:
536					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]);
537					break;
538				case PIPE_SHADER_FRAGMENT:
539					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]);
540					break;
541				case PIPE_SHADER_GEOMETRY:
542					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]);
543					break;
544				}
545			}
546		}
547	}
548
549	void update_state()
550	{
551		update_stage<D3D11_STAGE_PS>();
552		update_stage<D3D11_STAGE_VS>();
553		update_stage<D3D11_STAGE_GS>();
554#if API >= 11
555		update_stage<D3D11_STAGE_HS>();
556		update_stage<D3D11_STAGE_DS>();
557		update_stage<D3D11_STAGE_CS>();
558#endif
559
560		if(update_flags & UPDATE_VERTEX_BUFFERS)
561		{
562			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
563				--num_vertex_buffers;
564			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
565		}
566
567		update_flags = 0;
568	}
569
570	virtual void STDMETHODCALLTYPE IASetInputLayout(
571		ID3D11InputLayout *new_input_layout)
572	{
573		SYNCHRONIZED;
574		if(new_input_layout != input_layout.p)
575		{
576			input_layout = new_input_layout;
577			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
578		}
579	}
580
581	virtual void STDMETHODCALLTYPE IAGetInputLayout(
582		ID3D11InputLayout **out_input_layout)
583	{
584		SYNCHRONIZED;
585		*out_input_layout = input_layout.ref();
586	}
587
588	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
589		unsigned start,
590		unsigned count,
591		ID3D11Buffer *const *new_vertex_buffers,
592		const unsigned *new_strides,
593		const unsigned *new_offsets)
594	{
595		SYNCHRONIZED;
596		int last_different = -1;
597		for(unsigned i = 0; i < count; ++i)
598		{
599			ID3D11Buffer* buffer = new_vertex_buffers[i];
600			if(buffer != input_buffers[start + i].p
601				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
602				|| vertex_buffers[start + i].stride != new_strides[i]
603			)
604			{
605				input_buffers[start + i] = buffer;
606				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
607				vertex_buffers[start + i].buffer_offset = new_offsets[i];
608				vertex_buffers[start + i].stride = new_strides[i];
609				last_different = i;
610			}
611		}
612		if(last_different >= 0)
613		{
614			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
615			update_flags |= UPDATE_VERTEX_BUFFERS;
616		}
617	}
618
619	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
620		unsigned start,
621		unsigned count,
622		ID3D11Buffer **out_vertex_buffers,
623		unsigned *out_strides,
624		unsigned *out_offsets)
625	{
626		SYNCHRONIZED;
627		if(out_vertex_buffers)
628		{
629			for(unsigned i = 0; i < count; ++i)
630				out_vertex_buffers[i] = input_buffers[start + i].ref();
631		}
632
633		if(out_offsets)
634		{
635			for(unsigned i = 0; i < count; ++i)
636				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
637		}
638
639		if(out_strides)
640		{
641			for(unsigned i = 0; i < count; ++i)
642				out_strides[i] = vertex_buffers[start + i].stride;
643		}
644	}
645
646	void set_index_buffer()
647	{
648		pipe_index_buffer ib;
649		if(!index_buffer)
650		{
651			memset(&ib, 0, sizeof(ib));
652		}
653		else
654		{
655			switch(index_format) {
656			case DXGI_FORMAT_R32_UINT:
657				ib.index_size = 4;
658				strip_cut_index = 0xffffffff;
659				break;
660			case DXGI_FORMAT_R16_UINT:
661				ib.index_size = 2;
662				strip_cut_index = 0xffff;
663				break;
664			default:
665				ib.index_size = 1;
666				strip_cut_index = 0xff;
667				break;
668			}
669			ib.offset = index_offset;
670			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
671		}
672		pipe->set_index_buffer(pipe, &ib);
673	}
674
675	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
676		ID3D11Buffer *new_index_buffer,
677		DXGI_FORMAT new_index_format,
678		unsigned new_index_offset)
679	{
680		SYNCHRONIZED;
681		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
682		{
683			index_buffer = new_index_buffer;
684			index_format = new_index_format;
685			index_offset = new_index_offset;
686
687			set_index_buffer();
688		}
689	}
690
691	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
692		ID3D11Buffer **out_index_buffer,
693		DXGI_FORMAT *out_index_format,
694		unsigned *out_index_offset)
695	{
696		SYNCHRONIZED;
697		if(out_index_buffer)
698			*out_index_buffer = index_buffer.ref();
699		if(out_index_format)
700			*out_index_format = index_format;
701		if(out_index_offset)
702			*out_index_offset = index_offset;
703	}
704
705	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
706		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
707	{
708		SYNCHRONIZED;
709		if(primitive_topology != new_primitive_topology)
710		{
711			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
712				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
713			else
714				primitive_mode = 0;
715			primitive_topology = new_primitive_topology;
716		}
717	}
718
719	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
720		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
721	{
722		SYNCHRONIZED;
723		*out_primitive_topology = primitive_topology;
724	}
725
726	virtual void STDMETHODCALLTYPE DrawIndexed(
727		unsigned index_count,
728		unsigned start_index_location,
729		int base_vertex_location)
730	{
731		SYNCHRONIZED;
732		if(update_flags)
733			update_state();
734
735		pipe_draw_info info;
736		info.mode = primitive_mode;
737		info.indexed = TRUE;
738		info.count = index_count;
739		info.start = start_index_location;
740		info.index_bias = base_vertex_location;
741		info.min_index = 0;
742		info.max_index = ~0;
743		info.start_instance = 0;
744		info.instance_count = 1;
745		info.primitive_restart = TRUE;
746		info.restart_index = strip_cut_index;
747
748		pipe->draw_vbo(pipe, &info);
749	}
750
751	virtual void STDMETHODCALLTYPE Draw(
752		unsigned vertex_count,
753		unsigned start_vertex_location)
754	{
755		SYNCHRONIZED;
756		if(update_flags)
757			update_state();
758
759		pipe_draw_info info;
760		info.mode = primitive_mode;
761		info.indexed = FALSE;
762		info.count = vertex_count;
763		info.start = start_vertex_location;
764		info.index_bias = 0;
765		info.min_index = 0;
766		info.max_index = ~0;
767		info.start_instance = 0;
768		info.instance_count = 1;
769		info.primitive_restart = FALSE;
770
771		pipe->draw_vbo(pipe, &info);
772	}
773
774	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
775		unsigned index_countPerInstance,
776		unsigned instance_count,
777		unsigned start_index_location,
778		int base_vertex_location,
779		unsigned start_instance_location)
780	{
781		SYNCHRONIZED;
782		if(update_flags)
783			update_state();
784
785		pipe_draw_info info;
786		info.mode = primitive_mode;
787		info.indexed = TRUE;
788		info.count = index_countPerInstance;
789		info.start = start_index_location;
790		info.index_bias = base_vertex_location;
791		info.min_index = 0;
792		info.max_index = ~0;
793		info.start_instance = start_instance_location;
794		info.instance_count = instance_count;
795		info.primitive_restart = TRUE;
796		info.restart_index = strip_cut_index;
797
798		pipe->draw_vbo(pipe, &info);
799	}
800
801	virtual void STDMETHODCALLTYPE DrawInstanced(
802		unsigned vertex_countPerInstance,
803		unsigned instance_count,
804		unsigned start_vertex_location,
805		unsigned start_instance_location)
806	{
807		SYNCHRONIZED;
808		if(update_flags)
809			update_state();
810
811		pipe_draw_info info;
812		info.mode = primitive_mode;
813		info.indexed = FALSE;
814		info.count = vertex_countPerInstance;
815		info.start = start_vertex_location;
816		info.index_bias = 0;
817		info.min_index = 0;
818		info.max_index = ~0;
819		info.start_instance = start_instance_location;
820		info.instance_count = instance_count;
821		info.primitive_restart = FALSE;
822
823		pipe->draw_vbo(pipe, &info);
824	}
825
826	virtual void STDMETHODCALLTYPE DrawAuto(void)
827	{
828		if(!caps.so)
829			return;
830
831		SYNCHRONIZED;
832		if(update_flags)
833			update_state();
834
835		pipe->draw_stream_output(pipe, primitive_mode);
836	}
837
838	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
839		ID3D11Buffer *buffer,
840		unsigned aligned_byte_offset)
841	{
842		SYNCHRONIZED;
843		if(update_flags)
844			update_state();
845
846		struct {
847			unsigned count;
848			unsigned instance_count;
849			unsigned start;
850			unsigned index_bias;
851		} data;
852
853		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
854
855		pipe_draw_info info;
856		info.mode = primitive_mode;
857		info.indexed = TRUE;
858		info.start = data.start;
859		info.count = data.count;
860		info.index_bias = data.index_bias;
861		info.min_index = 0;
862		info.max_index = ~0;
863		info.start_instance = 0;
864		info.instance_count = data.instance_count;
865		info.primitive_restart = TRUE;
866		info.restart_index = strip_cut_index;
867
868		pipe->draw_vbo(pipe, &info);
869	}
870
871	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
872		ID3D11Buffer *buffer,
873		unsigned aligned_byte_offset)
874	{
875		SYNCHRONIZED;
876		if(update_flags)
877			update_state();
878
879		struct {
880			unsigned count;
881			unsigned instance_count;
882			unsigned start;
883		} data;
884
885		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
886
887		pipe_draw_info info;
888		info.mode = primitive_mode;
889		info.indexed = FALSE;
890		info.start = data.start;
891		info.count = data.count;
892		info.index_bias = 0;
893		info.min_index = 0;
894		info.max_index = ~0;
895		info.start_instance = 0;
896		info.instance_count = data.instance_count;
897		info.primitive_restart = FALSE;
898
899		pipe->draw_vbo(pipe, &info);
900	}
901
902#if API >= 11
903	virtual void STDMETHODCALLTYPE Dispatch(
904		unsigned thread_group_count_x,
905		unsigned thread_group_count_y,
906		unsigned thread_group_count_z)
907	{
908// uncomment this when this is implemented
909//		SYNCHRONIZED;
910//		if(update_flags)
911//			update_state();
912	}
913
914	virtual void STDMETHODCALLTYPE DispatchIndirect(
915		ID3D11Buffer *buffer,
916		unsigned aligned_byte_offset)
917	{
918// uncomment this when this is implemented
919//		SYNCHRONIZED;
920//		if(update_flags)
921//			update_state();
922	}
923#endif
924
925	void set_clip()
926	{
927		pipe_clip_state clip;
928		clip.nr = 0;
929		clip.depth_clamp = depth_clamp;
930		pipe->set_clip_state(pipe, &clip);
931	}
932
933	virtual void STDMETHODCALLTYPE RSSetState(
934		ID3D11RasterizerState *new_rasterizer_state)
935	{
936		SYNCHRONIZED;
937		if(new_rasterizer_state != rasterizer_state.p)
938		{
939			rasterizer_state = new_rasterizer_state;
940			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
941			bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
942			if(depth_clamp != new_depth_clamp)
943			{
944				depth_clamp = new_depth_clamp;
945				set_clip();
946			}
947		}
948	}
949
950	virtual void STDMETHODCALLTYPE RSGetState(
951		ID3D11RasterizerState **out_rasterizer_state)
952	{
953		SYNCHRONIZED;
954		*out_rasterizer_state = rasterizer_state.ref();
955	}
956
957	void set_viewport()
958	{
959		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
960		pipe_viewport_state viewport;
961		float half_width = viewports[0].Width * 0.5f;
962		float half_height = viewports[0].Height * 0.5f;
963
964		viewport.scale[0] = half_width;
965		viewport.scale[1] = -half_height;
966		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
967		viewport.scale[3] = 1.0f;
968		viewport.translate[0] = half_width + viewports[0].TopLeftX;
969		viewport.translate[1] = half_height + viewports[0].TopLeftY;
970		viewport.translate[2] = viewports[0].MinDepth;
971		viewport.translate[3] = 1.0f;
972		pipe->set_viewport_state(pipe, &viewport);
973	}
974
975	virtual void STDMETHODCALLTYPE RSSetViewports(
976		unsigned count,
977		const D3D11_VIEWPORT *new_viewports)
978	{
979		SYNCHRONIZED;
980		if(count)
981		{
982			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
983			{
984				viewports[0] = new_viewports[0];
985				set_viewport();
986			}
987			for(unsigned i = 1; i < count; ++i)
988				viewports[i] = new_viewports[i];
989		}
990		else if(num_viewports)
991		{
992			// TODO: what should we do here?
993			memset(&viewports[0], 0, sizeof(viewports[0]));
994			set_viewport();
995		}
996		num_viewports = count;
997	}
998
999	virtual void STDMETHODCALLTYPE RSGetViewports(
1000		unsigned *out_count,
1001		D3D11_VIEWPORT *out_viewports)
1002	{
1003		SYNCHRONIZED;
1004		if(out_viewports)
1005		{
1006			unsigned i;
1007			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1008				out_viewports[i] = viewports[i];
1009
1010			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1011		}
1012
1013		*out_count = num_viewports;
1014	}
1015
1016	void set_scissor()
1017	{
1018		pipe_scissor_state scissor;
1019		scissor.minx = scissor_rects[0].left;
1020		scissor.miny = scissor_rects[0].top;
1021		scissor.maxx = scissor_rects[0].right;
1022		scissor.maxy = scissor_rects[0].bottom;
1023		pipe->set_scissor_state(pipe, &scissor);
1024	}
1025
1026	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1027		unsigned count,
1028		const D3D11_RECT *new_rects)
1029	{
1030		SYNCHRONIZED;
1031		if(count)
1032		{
1033			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1034			{
1035				scissor_rects[0] = new_rects[0];
1036				set_scissor();
1037			}
1038			for(unsigned i = 1; i < count; ++i)
1039				scissor_rects[i] = new_rects[i];
1040		}
1041		else if(num_scissor_rects)
1042		{
1043			// TODO: what should we do here?
1044			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1045			set_scissor();
1046		}
1047
1048		num_scissor_rects = count;
1049	}
1050
1051	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1052		unsigned *out_count,
1053		D3D11_RECT *out_rects)
1054	{
1055		SYNCHRONIZED;
1056		if(out_rects)
1057		{
1058			unsigned i;
1059			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1060				out_rects[i] = scissor_rects[i];
1061
1062			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1063		}
1064
1065		*out_count = num_scissor_rects;
1066	}
1067
1068	virtual void STDMETHODCALLTYPE OMSetBlendState(
1069		ID3D11BlendState *new_blend_state,
1070		const float new_blend_factor[4],
1071		unsigned new_sample_mask)
1072	{
1073		SYNCHRONIZED;
1074		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1075
1076		if(blend_state.p != new_blend_state)
1077		{
1078			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1079			blend_state = new_blend_state;
1080		}
1081
1082		// Windows D3D11 does this, even though it's apparently undocumented
1083		if(!new_blend_factor)
1084			new_blend_factor = white;
1085
1086		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1087		{
1088			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1089			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1090		}
1091
1092		if(sample_mask != new_sample_mask)
1093		{
1094			pipe->set_sample_mask(pipe, new_sample_mask);
1095			sample_mask = new_sample_mask;
1096		}
1097	}
1098
1099	virtual void STDMETHODCALLTYPE OMGetBlendState(
1100		ID3D11BlendState **out_blend_state,
1101		float out_blend_factor[4],
1102		unsigned *out_sample_mask)
1103	{
1104		SYNCHRONIZED;
1105		if(out_blend_state)
1106			*out_blend_state = blend_state.ref();
1107		if(out_blend_factor)
1108			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1109		if(out_sample_mask)
1110			*out_sample_mask = sample_mask;
1111	}
1112
1113	void set_stencil_ref()
1114	{
1115		struct pipe_stencil_ref sref;
1116		sref.ref_value[0] = stencil_ref;
1117		sref.ref_value[1] = stencil_ref;
1118		pipe->set_stencil_ref(pipe, &sref);
1119	}
1120
1121	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1122		ID3D11DepthStencilState *new_depth_stencil_state,
1123		unsigned new_stencil_ref)
1124	{
1125		SYNCHRONIZED;
1126		if(new_depth_stencil_state != depth_stencil_state.p)
1127		{
1128			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1129			depth_stencil_state = new_depth_stencil_state;
1130		}
1131
1132		if(new_stencil_ref != stencil_ref)
1133		{
1134			stencil_ref = new_stencil_ref;
1135			set_stencil_ref();
1136		}
1137	}
1138
1139	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1140		ID3D11DepthStencilState **out_depth_stencil_state,
1141		unsigned *out_stencil_ref)
1142	{
1143		SYNCHRONIZED;
1144		if(*out_depth_stencil_state)
1145			*out_depth_stencil_state = depth_stencil_state.ref();
1146		if(out_stencil_ref)
1147			*out_stencil_ref = stencil_ref;
1148	}
1149
1150	void set_framebuffer()
1151	{
1152		struct pipe_framebuffer_state fb;
1153		memset(&fb, 0, sizeof(fb));
1154		if(depth_stencil_view)
1155		{
1156			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1157			fb.zsbuf = surf;
1158			if(surf->width > fb.width)
1159				fb.width = surf->width;
1160			if(surf->height > fb.height)
1161				fb.height = surf->height;
1162		}
1163		fb.nr_cbufs = num_render_target_views;
1164		unsigned i;
1165		for(i = 0; i < num_render_target_views; ++i)
1166		{
1167			if(render_target_views[i])
1168			{
1169				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1170				fb.cbufs[i] = surf;
1171				if(surf->width > fb.width)
1172					fb.width = surf->width;
1173				if(surf->height > fb.height)
1174					fb.height = surf->height;
1175			}
1176		}
1177
1178		pipe->set_framebuffer_state(pipe, &fb);
1179	}
1180
1181	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1182	 * Hopefully nobody relies on this happening
1183	 */
1184
1185	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1186		unsigned count,
1187		ID3D11RenderTargetView *const *new_render_target_views,
1188		ID3D11DepthStencilView  *new_depth_stencil_view)
1189	{
1190		SYNCHRONIZED;
1191
1192		bool update = false;
1193		unsigned i, num;
1194
1195		if(depth_stencil_view.p != new_depth_stencil_view) {
1196			update = true;
1197			depth_stencil_view = new_depth_stencil_view;
1198		}
1199
1200		if(!new_render_target_views)
1201			count = 0;
1202
1203		for(num = 0, i = 0; i < count; ++i) {
1204#if API >= 11
1205			// XXX: is unbinding the UAVs here correct ?
1206			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1207#endif
1208			if(new_render_target_views[i] != render_target_views[i].p) {
1209				update = true;
1210				render_target_views[i] = new_render_target_views[i];
1211			}
1212			if(new_render_target_views[i])
1213				num = i + 1;
1214		}
1215		if(num != num_render_target_views) {
1216			update = true;
1217			for(; i < num_render_target_views; ++i)
1218				render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1219		}
1220		num_render_target_views = num;
1221		if(update)
1222			set_framebuffer();
1223	}
1224
1225	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1226		unsigned count,
1227		ID3D11RenderTargetView **out_render_target_views,
1228		ID3D11DepthStencilView  **out_depth_stencil_view)
1229	{
1230		SYNCHRONIZED;
1231		if(out_render_target_views)
1232		{
1233			unsigned i;
1234			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1235				out_render_target_views[i] = render_target_views[i].ref();
1236
1237			for(; i < count; ++i)
1238				out_render_target_views[i] = 0;
1239		}
1240
1241		if(out_depth_stencil_view)
1242			*out_depth_stencil_view = depth_stencil_view.ref();
1243	}
1244
1245#if API >= 11
1246	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1247	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1248		unsigned rtv_count,
1249		ID3D11RenderTargetView *const *new_render_target_views,
1250		ID3D11DepthStencilView  *new_depth_stencil_view,
1251		unsigned uav_start,
1252		unsigned uav_count,
1253		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1254		const unsigned *new_uav_initial_counts)
1255	{
1256		SYNCHRONIZED;
1257		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1258			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1259
1260		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1261		{
1262			for(unsigned i = 0; i < uav_count; ++i)
1263			{
1264				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1265				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1266			}
1267		}
1268	}
1269
1270	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1271		unsigned rtv_count,
1272		ID3D11RenderTargetView **out_render_target_views,
1273		ID3D11DepthStencilView  **out_depth_stencil_view,
1274		unsigned uav_start,
1275		unsigned uav_count,
1276		ID3D11UnorderedAccessView **out_unordered_access_views)
1277	{
1278		SYNCHRONIZED;
1279		if(out_render_target_views)
1280			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1281
1282		if(out_unordered_access_views)
1283		{
1284			for(unsigned i = 0; i < uav_count; ++i)
1285				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1286		}
1287	}
1288#endif
1289
1290	virtual void STDMETHODCALLTYPE SOSetTargets(
1291		unsigned count,
1292		ID3D11Buffer *const *new_so_targets,
1293		const unsigned *new_offsets)
1294	{
1295		SYNCHRONIZED;
1296		unsigned i;
1297		if(!new_so_targets)
1298			count = 0;
1299		bool changed = false;
1300		for(i = 0; i < count; ++i)
1301		{
1302			ID3D11Buffer* buffer = new_so_targets[i];
1303			if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1304			{
1305				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1306				so_targets[i] = buffer;
1307				so_offsets[i] = new_offsets[i];
1308				changed = true;
1309			}
1310		}
1311		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1312		{
1313			if(so_targets[i].p || so_offsets[i])
1314			{
1315				changed = true;
1316				so_targets[i] = (ID3D11Buffer*)0;
1317				so_offsets[i] = 0;
1318			}
1319		}
1320		num_so_targets = count;
1321
1322		if(changed && caps.so)
1323			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1324	}
1325
1326	virtual void STDMETHODCALLTYPE SOGetTargets(
1327		unsigned count,
1328		ID3D11Buffer **out_so_targets
1329#if API < 11
1330		, UINT *out_offsets
1331#endif
1332		)
1333	{
1334		SYNCHRONIZED;
1335		for(unsigned i = 0; i < count; ++i)
1336		{
1337			out_so_targets[i] = so_targets[i].ref();
1338#if API < 11
1339			out_offsets[i] = so_offsets[i];
1340#endif
1341		}
1342	}
1343
1344	virtual void STDMETHODCALLTYPE Begin(
1345		ID3D11Asynchronous *async)
1346	{
1347		SYNCHRONIZED;
1348		if(caps.queries)
1349			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1350	}
1351
1352	virtual void STDMETHODCALLTYPE End(
1353		ID3D11Asynchronous *async)
1354	{
1355		SYNCHRONIZED;
1356		if(caps.queries)
1357			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1358	}
1359
1360	virtual HRESULT STDMETHODCALLTYPE GetData(
1361		ID3D11Asynchronous *iasync,
1362		void *out_data,
1363		unsigned data_size,
1364		unsigned get_data_flags)
1365	{
1366		SYNCHRONIZED;
1367		if(!caps.queries)
1368			return E_NOTIMPL;
1369
1370		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1371		void* tmp_data = alloca(async->data_size);
1372		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1373		if(out_data)
1374			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1375		return ret ? S_OK : S_FALSE;
1376	}
1377
1378	void set_render_condition()
1379	{
1380		if(caps.render_condition)
1381		{
1382			if(!render_predicate)
1383				pipe->render_condition(pipe, 0, 0);
1384			else
1385			{
1386				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1387				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1388				{
1389					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1390					pipe->render_condition(pipe, predicate->query, mode);
1391				}
1392				else
1393				{
1394					/* TODO: add inverted predication to Gallium*/
1395					pipe->render_condition(pipe, 0, 0);
1396				}
1397			}
1398		}
1399	}
1400
1401	virtual void STDMETHODCALLTYPE SetPredication(
1402		ID3D11Predicate *new_predicate,
1403		BOOL new_predicate_value)
1404	{
1405		SYNCHRONIZED;
1406		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1407		{
1408			render_predicate = new_predicate;
1409			render_predicate_value = new_predicate_value;
1410			set_render_condition();
1411		}
1412	}
1413
1414	virtual void STDMETHODCALLTYPE GetPredication(
1415		ID3D11Predicate **out_predicate,
1416		BOOL *out_predicate_value)
1417	{
1418		SYNCHRONIZED;
1419		if(out_predicate)
1420			*out_predicate = render_predicate.ref();
1421		if(out_predicate_value)
1422			*out_predicate_value = render_predicate_value;
1423	}
1424
1425	static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1426	{
1427		if(subresource <= resource->last_level)
1428		{
1429			return subresource;
1430		}
1431		else
1432		{
1433			unsigned levels = resource->last_level + 1;
1434			return subresource % levels;
1435		}
1436	}
1437
1438	static unsigned d3d11_subresource_to_layer(struct pipe_resource* resource, unsigned subresource)
1439	{
1440		if(subresource <= resource->last_level)
1441		{
1442			return 0;
1443		}
1444		else
1445		{
1446			unsigned levels = resource->last_level + 1;
1447			return subresource / levels;
1448		}
1449	}
1450
1451
1452	/* TODO: deferred contexts will need a different implementation of this,
1453	 * because we can't put the transfer info into the resource itself.
1454	 * Also, there are very different restrictions, for obvious reasons.
1455	 */
1456	virtual HRESULT STDMETHODCALLTYPE Map(
1457		ID3D11Resource *iresource,
1458		unsigned subresource,
1459		D3D11_MAP map_type,
1460		unsigned map_flags,
1461		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1462	{
1463		SYNCHRONIZED;
1464		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1465		if(resource->transfers.count(subresource))
1466			return E_FAIL;
1467		unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1468		unsigned layer = d3d11_subresource_to_layer(resource->resource, subresource);
1469		pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1470		box.z += layer;
1471		unsigned usage = 0;
1472		if(map_type == D3D11_MAP_READ)
1473			usage = PIPE_TRANSFER_READ;
1474		else if(map_type == D3D11_MAP_WRITE)
1475			usage = PIPE_TRANSFER_WRITE;
1476		else if(map_type == D3D11_MAP_READ_WRITE)
1477			usage = PIPE_TRANSFER_READ_WRITE;
1478		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1479			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1480		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1481			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1482		else
1483			return E_INVALIDARG;
1484		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1485			usage |= PIPE_TRANSFER_DONTBLOCK;
1486		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1487		if(!transfer) {
1488			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1489				return DXGI_ERROR_WAS_STILL_DRAWING;
1490			else
1491				return E_FAIL;
1492		}
1493		resource->transfers[subresource] = transfer;
1494		mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1495		mapped_resource->RowPitch = transfer->stride;
1496		mapped_resource->DepthPitch = transfer->layer_stride;
1497		return S_OK;
1498	}
1499
1500	virtual void STDMETHODCALLTYPE Unmap(
1501		ID3D11Resource *iresource,
1502		unsigned subresource)
1503	{
1504		SYNCHRONIZED;
1505		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1506		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1507		if(i != resource->transfers.end())
1508		{
1509			pipe->transfer_unmap(pipe, i->second);
1510			pipe->transfer_destroy(pipe, i->second);
1511			resource->transfers.erase(i);
1512		}
1513	}
1514
1515	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1516		ID3D11Resource *dst_resource,
1517		unsigned dst_subresource,
1518		unsigned dst_x,
1519		unsigned dst_y,
1520		unsigned dst_z,
1521		ID3D11Resource *src_resource,
1522		unsigned src_subresource,
1523		const D3D11_BOX *src_box)
1524	{
1525		SYNCHRONIZED;
1526		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1527		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1528		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1529		unsigned dst_layer = d3d11_subresource_to_layer(dst->resource, dst_subresource);
1530		unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1531		unsigned src_layer = d3d11_subresource_to_layer(src->resource, src_subresource);
1532		pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1533		dst_z += dst_layer;
1534		box.z += src_layer;
1535		{
1536			pipe->resource_copy_region(pipe,
1537				dst->resource, dst_level, dst_x, dst_y, dst_z,
1538				src->resource, src_level, &box);
1539		}
1540	}
1541
1542	virtual void STDMETHODCALLTYPE CopyResource(
1543		ID3D11Resource *dst_resource,
1544		ID3D11Resource *src_resource)
1545	{
1546		SYNCHRONIZED;
1547		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1548		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1549		unsigned level;
1550		for(level = 0; level <= dst->resource->last_level; ++level)
1551		{
1552			pipe_box box;
1553			box.x = box.y = box.z = 0;
1554			box.width = u_minify(dst->resource->width0, level);
1555			box.height = u_minify(dst->resource->height0, level);
1556			if(dst->resource->target == PIPE_TEXTURE_3D)
1557				box.depth = u_minify(dst->resource->depth0, level);
1558			else
1559				box.depth = dst->resource->array_size;
1560			pipe->resource_copy_region(pipe,
1561						   dst->resource, level, 0, 0, 0,
1562						   src->resource, level, &box);
1563		}
1564	}
1565
1566	virtual void STDMETHODCALLTYPE UpdateSubresource(
1567		ID3D11Resource *dst_resource,
1568		unsigned dst_subresource,
1569		const D3D11_BOX *pDstBox,
1570		const void *pSrcData,
1571		unsigned src_row_pitch,
1572		unsigned src_depth_pitch)
1573	{
1574		SYNCHRONIZED;
1575		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1576		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1577		unsigned dst_layer = d3d11_subresource_to_layer(dst->resource, dst_subresource);
1578		pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1579		box.z += dst_layer;
1580		pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1581	}
1582
1583#if API >= 11
1584	virtual void STDMETHODCALLTYPE CopyStructureCount(
1585		ID3D11Buffer *dst_buffer,
1586		unsigned dst_aligned_byte_offset,
1587		ID3D11UnorderedAccessView *src_view)
1588	{
1589		SYNCHRONIZED;
1590	}
1591#endif
1592
1593	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1594		ID3D11RenderTargetView *render_target_view,
1595		const float color[4])
1596	{
1597		SYNCHRONIZED;
1598		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1599		union pipe_color_union cc;
1600		cc.f[0] = color[0];
1601		cc.f[1] = color[1];
1602		cc.f[2] = color[2];
1603		cc.f[3] = color[3];
1604		pipe->clear_render_target(pipe, view->object, &cc, 0, 0, view->object->width, view->object->height);
1605	}
1606
1607	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1608		ID3D11DepthStencilView  *depth_stencil_view,
1609		unsigned clear_flags,
1610		float depth,
1611		UINT8 stencil)
1612	{
1613		SYNCHRONIZED;
1614		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1615		unsigned flags = 0;
1616		if(clear_flags & D3D11_CLEAR_DEPTH)
1617			flags |= PIPE_CLEAR_DEPTH;
1618		if(clear_flags & D3D11_CLEAR_STENCIL)
1619			flags |= PIPE_CLEAR_STENCIL;
1620		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1621	}
1622
1623#if API >= 11
1624	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1625		ID3D11UnorderedAccessView *unordered_access_view,
1626		const unsigned values[4])
1627	{
1628		SYNCHRONIZED;
1629	}
1630
1631	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1632			ID3D11UnorderedAccessView *unordered_access_view,
1633			const float values[4])
1634	{
1635		SYNCHRONIZED;
1636	}
1637#endif
1638
1639	void restore_gallium_state_blit_only()
1640	{
1641		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1642		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1643		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1644		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1645		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1646		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1647		if(caps.gs)
1648			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1649		set_framebuffer();
1650		set_viewport();
1651		set_clip();
1652		set_render_condition();
1653		// TODO: restore stream output
1654
1655		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1656	}
1657
1658	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1659	{
1660		SYNCHRONIZED;
1661		restore_gallium_state_blit_only();
1662	}
1663
1664	virtual void STDMETHODCALLTYPE GenerateMips(
1665		ID3D11ShaderResourceView *shader_resource_view)
1666	{
1667		SYNCHRONIZED;
1668
1669		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1670		if(caps.gs)
1671			pipe->bind_gs_state(pipe, 0);
1672		if(caps.so)
1673			pipe->bind_stream_output_state(pipe, 0);
1674		if(pipe->render_condition)
1675			pipe->render_condition(pipe, 0, 0);
1676		for(unsigned layer = view->object->u.tex.first_layer; layer <= view->object->u.tex.last_layer; ++layer)
1677			util_gen_mipmap(gen_mipmap, view->object, layer, view->object->u.tex.first_level, view->object->u.tex.last_level, PIPE_TEX_FILTER_LINEAR);
1678		restore_gallium_state_blit_only();
1679	}
1680
1681	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1682	{
1683		SYNCHRONIZED;
1684		restore_gallium_state_blit_only();
1685
1686		set_index_buffer();
1687		set_stencil_ref();
1688		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1689		pipe->set_sample_mask(pipe, sample_mask);
1690
1691		for(unsigned s = 0; s < 3; ++s)
1692		{
1693			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1694			for(unsigned i = 0; i < num; ++i)
1695				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1696		}
1697
1698		if(caps.so)
1699			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1700
1701		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1702		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1703
1704		set_scissor();
1705	}
1706
1707#if API >= 11
1708	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1709	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1710		ID3D11Resource *iresource,
1711		float min_lod)
1712	{
1713		SYNCHRONIZED;
1714		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1715		if(resource->min_lod != min_lod)
1716		{
1717			// TODO: actually do anything?
1718			resource->min_lod = min_lod;
1719		}
1720	}
1721
1722	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1723		ID3D11Resource *iresource)
1724	{
1725		SYNCHRONIZED;
1726		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1727		return resource->min_lod;
1728	}
1729#endif
1730
1731	virtual void STDMETHODCALLTYPE ResolveSubresource(
1732		ID3D11Resource *dst_resource,
1733		unsigned dst_subresource,
1734		ID3D11Resource *src_resource,
1735		unsigned src_subresource,
1736		DXGI_FORMAT format)
1737	{
1738		SYNCHRONIZED;
1739		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1740		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1741		struct pipe_resolve_info info;
1742
1743		info.dst.res = dst->resource;
1744		info.src.res = src->resource;
1745		info.dst.level = 0;
1746		info.dst.layer = d3d11_subresource_to_layer(dst->resource, dst_subresource);
1747		info.src.layer = d3d11_subresource_to_layer(src->resource, src_subresource);
1748
1749		info.src.x0 = 0;
1750		info.src.x1 = info.src.res->width0;
1751		info.src.y0 = 0;
1752		info.src.y1 = info.src.res->height0;
1753		info.dst.x0 = 0;
1754		info.dst.x1 = info.dst.res->width0;
1755		info.dst.y0 = 0;
1756		info.dst.y1 = info.dst.res->height0;
1757
1758		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
1759
1760		pipe->resource_resolve(pipe, &info);
1761	}
1762
1763#if API >= 11
1764	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1765		ID3D11CommandList *command_list,
1766		BOOL restore_context_state)
1767	{
1768		SYNCHRONIZED;
1769	}
1770
1771	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1772		BOOL restore_deferred_context_state,
1773		ID3D11CommandList **out_command_list)
1774	{
1775		SYNCHRONIZED;
1776		return E_NOTIMPL;
1777	}
1778#endif
1779
1780	virtual void STDMETHODCALLTYPE ClearState(void)
1781	{
1782		/* we don't take a lock here because we would deadlock otherwise
1783		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1784		 * However, I can't think of any correct usage that would be affected by this
1785		 * being non-atomic, and making this atomic is quite expensive and complicates
1786		 * the code
1787		 */
1788
1789		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1790		// TODO: make sure all this gets inlined, which might require more compiler flags
1791		// TODO: optimize this
1792#if API >= 11
1793		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1794		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1795		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1796		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1797		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1798		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1799#else
1800		GalliumD3D11DeviceContext::PSSetShader(0);
1801		GalliumD3D11DeviceContext::GSSetShader(0);
1802		GalliumD3D11DeviceContext::VSSetShader(0);
1803#endif
1804
1805		GalliumD3D11DeviceContext::IASetInputLayout(0);
1806		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1807		GalliumD3D11DeviceContext::RSSetState(0);
1808		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1809		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1810		GalliumD3D11DeviceContext::SetPredication(0, 0);
1811		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1812
1813		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1814		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1815		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1816#if API >= 11
1817		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1818		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1819		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1820#endif
1821
1822		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1823#if API >= 11
1824		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1825#else
1826		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1827#endif
1828		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1829
1830		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1831		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1832		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1833#if API >= 11
1834		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1835		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1836		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1837#endif
1838
1839		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1840		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1841		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1842#if API >= 11
1843		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1844		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1845		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1846#endif
1847
1848		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1849		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1850	}
1851
1852	virtual void STDMETHODCALLTYPE Flush(void)
1853	{
1854		SYNCHRONIZED;
1855                pipe->flush(pipe, 0);
1856	}
1857
1858	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1859	 * cleanly unbound from the pipeline.
1860	 * In Direct3D 11, the pipeline holds a reference.
1861	 *
1862	 * Note that instead of always scanning the pipeline on destruction, we could
1863	 * maintain the internal reference count on DirectX 10 and use it to check if an
1864	 * object is still bound.
1865	 * Presumably, on average, scanning is faster if the application is well written.
1866	 */
1867#if API < 11
1868#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1869	void Unbind##name(ID3D11##name* state) \
1870	{ \
1871		SYNCHRONIZED; \
1872		if((void*)state == (void*)member.p) \
1873		{ \
1874			member.p = 0; \
1875			pipe->bind_##gallium##_state(pipe, default_##def); \
1876		} \
1877	}
1878	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1879	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1880	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1881	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1882	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1883	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1884	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1885
1886	void UnbindPredicate(ID3D11Predicate* predicate)
1887	{
1888		SYNCHRONIZED;
1889		if(predicate == render_predicate)
1890		{
1891			render_predicate.p = NULL;
1892			render_predicate_value = 0;
1893			pipe->render_condition(pipe, 0, 0);
1894		}
1895	}
1896
1897	void UnbindSamplerState(ID3D11SamplerState* state)
1898	{
1899		SYNCHRONIZED;
1900		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1901		{
1902			for(unsigned i = 0; i < num_samplers[s]; ++i)
1903			{
1904				if(samplers[s][i] == state)
1905				{
1906					samplers[s][i].p = NULL;
1907					sampler_csos[s][i] = NULL;
1908					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1909				}
1910			}
1911		}
1912	}
1913
1914	void UnbindBuffer(ID3D11Buffer* buffer)
1915	{
1916		SYNCHRONIZED;
1917		if(buffer == index_buffer)
1918		{
1919			index_buffer.p = 0;
1920			index_format = DXGI_FORMAT_UNKNOWN;
1921			index_offset = 0;
1922			struct pipe_index_buffer ib;
1923			memset(&ib, 0, sizeof(ib));
1924			pipe->set_index_buffer(pipe, &ib);
1925		}
1926
1927		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1928		{
1929			if(buffer == input_buffers[i])
1930			{
1931				input_buffers[i].p = 0;
1932				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1933				update_flags |= UPDATE_VERTEX_BUFFERS;
1934			}
1935		}
1936
1937		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1938		{
1939			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1940			{
1941				if(constant_buffers[s][i] == buffer)
1942				{
1943					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1944					pipe->set_constant_buffer(pipe, s, i, NULL);
1945				}
1946			}
1947		}
1948	}
1949
1950	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1951	{
1952		SYNCHRONIZED;
1953		if(view == depth_stencil_view)
1954		{
1955			depth_stencil_view.p = NULL;
1956			set_framebuffer();
1957		}
1958	}
1959
1960	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1961	{
1962		SYNCHRONIZED;
1963		bool any_bound = false;
1964		for(unsigned i = 0; i < num_render_target_views; ++i)
1965		{
1966			if(render_target_views[i] == view)
1967			{
1968				render_target_views[i].p = NULL;
1969				any_bound = true;
1970			}
1971		}
1972		if(any_bound)
1973			set_framebuffer();
1974	}
1975
1976	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1977	{
1978		SYNCHRONIZED;
1979		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1980		{
1981			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1982			{
1983				if(shader_resource_views[s][i] == view)
1984				{
1985					shader_resource_views[s][i].p = NULL;
1986					sampler_views[s][i] = NULL;
1987					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1988				}
1989			}
1990		}
1991	}
1992#endif
1993
1994#undef SYNCHRONIZED
1995};
1996
1997#if API >= 11
1998/* This approach serves two purposes.
1999 * First, we don't want to do an atomic operation to manipulate the reference
2000 * count every time something is bound/unbound to the pipeline, since they are
2001 * expensive.
2002 * Fortunately, the immediate context can only be used by a single thread, so
2003 * we don't have to use them, as long as a separate reference count is used
2004 * (see dual_refcnt_t).
2005 *
2006 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
2007 * garbage cycle.
2008 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
2009 * one for each external reference count, while internal nonatomic_add_ref doesn't
2010 * add any.
2011 *
2012 * Note that ideally we would to eliminate the non-atomic op too, but this is more
2013 * complicated, since we would either need to use garbage collection and give up
2014 * deterministic destruction (especially bad for large textures), or scan the whole
2015 * pipeline state every time the reference count of object drops to 0, which risks
2016 * pathological slowdowns.
2017 *
2018 * Since this microoptimization should matter relatively little, let's avoid it for now.
2019 *
2020 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
2021 * Eliminating the atomic ops for deferred contexts seems substantially harder.
2022 * This might be a problem if they are used in a one-shot multithreaded rendering
2023 * fashion, where SMP cacheline bouncing on the reference count may be visible.
2024 *
2025 * The idea would be to attach a structure of reference counts indexed by deferred
2026 * context id to each object. Ideally, this should be organized like ext2 block pointers.
2027 *
2028 * Every deferred context would get a reference count in its own cacheline.
2029 * The external count is protected by a lock bit, and there is also a "lock bit" in each
2030 * internal count.
2031 *
2032 * When the external count has to be dropped to 0, the lock bit is taken and all internal
2033 * reference counts are scanned, taking a count of them. A flag would also be set on them.
2034 * Deferred context manipulation would notice the flag, and update the count.
2035 * Once the count goes to zero, the object is freed.
2036 *
2037 * The problem of this is that if the external reference count ping-pongs between
2038 * zero and non-zero, the scans will take a lot of time.
2039 *
2040 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2041 * each binary tree node would have a "determined bit", which would be invalidated
2042 * by manipulations.
2043 *
2044 * However, all this complexity might actually be a loss in most cases, so let's just
2045 * stick to a single atomic refcnt for now.
2046 *
2047 * Also, we don't even support deferred contexts yet, so this can wait.
2048 */
2049struct nonatomic_device_child_ptr_traits
2050{
2051	static void add_ref(void* p)
2052	{
2053		if(p)
2054			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2055	}
2056
2057	static void release(void* p)
2058	{
2059		if(p)
2060			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2061	}
2062};
2063
2064struct GalliumD3D11ImmediateDeviceContext
2065	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2066{
2067	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2068	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2069	{
2070		// not necessary, but tests that the API at least basically works
2071		ClearState();
2072	}
2073
2074	/* we do this since otherwise we would have a garbage cycle between this and the device */
2075	virtual ULONG STDMETHODCALLTYPE AddRef()
2076	{
2077		return this->device->AddRef();
2078	}
2079
2080	virtual ULONG STDMETHODCALLTYPE Release()
2081	{
2082		return this->device->Release();
2083	}
2084
2085	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2086	{
2087		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2088	}
2089};
2090
2091static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2092{
2093	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2094}
2095
2096static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2097{
2098	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2099}
2100
2101static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2102{
2103	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2104}
2105
2106static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2107{
2108	delete (GalliumD3D11ImmediateDeviceContext*)context;
2109}
2110#endif
2111