d3d11_context.h revision 4a06248281f65d8a653c1bb744947bb6d47fdc9c
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	BOOL render_predicate_value;
75	float blend_color[4];
76	unsigned sample_mask;
77	unsigned stencil_ref;
78	bool depth_clamp;
79
80	void* default_input_layout;
81	void* default_rasterizer;
82	void* default_depth_stencil;
83	void* default_blend;
84	void* default_sampler;
85	void* ld_sampler;
86	void * default_shaders[D3D11_STAGES];
87
88	// derived state
89	int primitive_mode;
90	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93	struct
94	{
95		void* ld; // accessed with a -1 index from v
96		void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97	} sampler_csos[D3D11_STAGES];
98	struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99	unsigned num_shader_resource_views[D3D11_STAGES];
100	unsigned num_samplers[D3D11_STAGES];
101	unsigned num_vertex_buffers;
102	unsigned num_render_target_views;
103	unsigned num_viewports;
104	unsigned num_scissor_rects;
105	unsigned num_so_targets;
106
107	struct pipe_context* pipe;
108	unsigned update_flags;
109
110	bool owns_pipe;
111	unsigned context_flags;
112
113	GalliumD3D11Caps caps;
114
115	cso_context* cso_ctx;
116	gen_mipmap_state* gen_mipmap;
117
118#if API >= 11
119#define SYNCHRONIZED do {} while(0)
120
121	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123	{
124		caps = device->screen_caps;
125		init_context();
126	}
127
128	~GalliumD3D11DeviceContext()
129	{
130		destroy_context();
131	}
132#else
133#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137	{
138		caps = this->screen_caps;
139		init_context();
140	}
141
142	~GalliumD3D10Device()
143	{
144		destroy_context();
145	}
146#endif
147
148	void init_context()
149	{
150		if(!pipe->begin_query)
151			caps.queries = false;
152		if(!pipe->render_condition)
153			caps.render_condition = false;
154		if(!pipe->bind_gs_state)
155		{
156			caps.gs = false;
157			caps.stages = 2;
158		}
159		if(!pipe->set_stream_output_buffers)
160			caps.so = false;
161		if(!pipe->set_geometry_sampler_views)
162			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
163		if(!pipe->set_fragment_sampler_views)
164			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
165		if(!pipe->set_vertex_sampler_views)
166			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
167
168		update_flags = 0;
169
170		// pipeline state
171		memset(viewports, 0, sizeof(viewports));
172		memset(scissor_rects, 0, sizeof(scissor_rects));
173		memset(so_offsets, 0, sizeof(so_offsets));
174		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
175		index_format = DXGI_FORMAT_UNKNOWN;
176		index_offset = 0;
177		render_predicate_value = 0;
178		memset(blend_color, 0, sizeof(blend_color));
179		sample_mask = ~0;
180		stencil_ref = 0;
181		depth_clamp = 0;
182
183		// derived state
184		primitive_mode = 0;
185		memset(vertex_buffers, 0, sizeof(vertex_buffers));
186		memset(so_buffers, 0, sizeof(so_buffers));
187		memset(sampler_views, 0, sizeof(sampler_views));
188		memset(sampler_csos, 0, sizeof(sampler_csos));
189		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
190		memset(num_samplers, 0, sizeof(num_samplers));
191		num_vertex_buffers = 0;
192		num_render_target_views = 0;
193		num_viewports = 0;
194		num_scissor_rects = 0;
195		num_so_targets = 0;
196
197		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
198
199		struct pipe_rasterizer_state rasterizerd;
200		memset(&rasterizerd, 0, sizeof(rasterizerd));
201		rasterizerd.gl_rasterization_rules = 1;
202		rasterizerd.cull_face = PIPE_FACE_BACK;
203		rasterizerd.flatshade_first = 1;
204		rasterizerd.line_width = 1.0f;
205		rasterizerd.point_size = 1.0f;
206		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
207
208		struct pipe_depth_stencil_alpha_state depth_stencild;
209		memset(&depth_stencild, 0, sizeof(depth_stencild));
210		depth_stencild.depth.enabled = TRUE;
211		depth_stencild.depth.writemask = 1;
212		depth_stencild.depth.func = PIPE_FUNC_LESS;
213		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
214
215		struct pipe_blend_state blendd;
216		memset(&blendd, 0, sizeof(blendd));
217		blendd.rt[0].colormask = 0xf;
218		default_blend = pipe->create_blend_state(pipe, &blendd);
219
220		struct pipe_sampler_state samplerd;
221		memset(&samplerd, 0, sizeof(samplerd));
222		samplerd.normalized_coords = 1;
223		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
224		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
225		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
226		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
227		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
228		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
229		samplerd.border_color[0] = 1.0f;
230		samplerd.border_color[1] = 1.0f;
231		samplerd.border_color[2] = 1.0f;
232		samplerd.border_color[3] = 1.0f;
233		samplerd.min_lod = -FLT_MAX;
234		samplerd.max_lod = FLT_MAX;
235		samplerd.max_anisotropy = 1;
236		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
237
238		memset(&samplerd, 0, sizeof(samplerd));
239		samplerd.normalized_coords = 0;
240		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
241		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
242		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
243		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
244		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
245		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
246		samplerd.min_lod = -FLT_MAX;
247		samplerd.max_lod = FLT_MAX;
248		samplerd.max_anisotropy = 1;
249		ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
250
251		for(unsigned s = 0; s < D3D11_STAGES; ++s)
252		{
253			sampler_csos[s].ld = ld_sampler;
254			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
255				sampler_csos[s].v[i] = default_sampler;
256		}
257
258		// TODO: should this really be empty shaders, or should they be all-passthrough?
259		memset(default_shaders, 0, sizeof(default_shaders));
260		struct ureg_program *ureg;
261		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
262		ureg_END(ureg);
263		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
264
265		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
266		ureg_END(ureg);
267		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
268
269		cso_ctx = cso_create_context(pipe);
270		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
271
272		RestoreGalliumState();
273	}
274
275	void destroy_context()
276	{
277		util_destroy_gen_mipmap(gen_mipmap);
278		cso_destroy_context(cso_ctx);
279
280		pipe->bind_vertex_elements_state(pipe, 0);
281		pipe->delete_vertex_elements_state(pipe, default_input_layout);
282
283		pipe->bind_rasterizer_state(pipe, 0);
284		pipe->delete_rasterizer_state(pipe, default_rasterizer);
285
286		pipe->bind_depth_stencil_alpha_state(pipe, 0);
287		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
288
289		pipe->bind_blend_state(pipe, 0);
290		pipe->delete_blend_state(pipe, default_blend);
291
292		pipe->bind_fragment_sampler_states(pipe, 0, 0);
293		pipe->bind_vertex_sampler_states(pipe, 0, 0);
294		if(pipe->bind_geometry_sampler_states)
295			pipe->bind_geometry_sampler_states(pipe, 0, 0);
296		pipe->delete_sampler_state(pipe, default_sampler);
297		pipe->delete_sampler_state(pipe, ld_sampler);
298
299		pipe->bind_fs_state(pipe, 0);
300		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
301
302		pipe->bind_vs_state(pipe, 0);
303		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
304
305		if(owns_pipe)
306			pipe->destroy(pipe);
307	}
308
309	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
310	{
311		return context_flags;
312	}
313#if API >= 11
314#define SET_SHADER_EXTRA_ARGS , \
315	ID3D11ClassInstance *const *ppClassInstances, \
316	unsigned count
317#define GET_SHADER_EXTRA_ARGS , \
318		ID3D11ClassInstance **ppClassInstances, \
319		unsigned *out_count
320#else
321#define SET_SHADER_EXTRA_ARGS
322#define GET_SHADER_EXTRA_ARGS
323#endif
324
325/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
326 * Instead, you have to pass a pointer to nulls to unbind things.
327 * We do the same.
328 * TODO: is D3D10 the same?
329 */
330	template<unsigned s>
331	void xs_set_shader(GalliumD3D11Shader<>* shader)
332	{
333		if(shader != shaders[s].p)
334		{
335			shaders[s] = shader;
336			void* shader_cso = shader ? shader->object : default_shaders[s];
337			switch(s)
338			{
339			case PIPE_SHADER_VERTEX:
340				pipe->bind_vs_state(pipe, shader_cso);
341				break;
342			case PIPE_SHADER_FRAGMENT:
343				pipe->bind_fs_state(pipe, shader_cso);
344				break;
345			case PIPE_SHADER_GEOMETRY:
346				pipe->bind_gs_state(pipe, shader_cso);
347				break;
348			}
349			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
350		}
351	}
352
353	template<unsigned s>
354	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
355	{
356		for(unsigned i = 0; i < count; ++i)
357		{
358			if(constbufs[i] != constant_buffers[s][start + i].p)
359			{
360				constant_buffers[s][start + i] = constbufs[i];
361				if(s < caps.stages && start + i < caps.constant_buffers[s])
362					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
363			}
364		}
365	}
366
367	template<unsigned s>
368	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
369	{
370		int last_different = -1;
371		for(unsigned i = 0; i < count; ++i)
372		{
373			if(shader_resource_views[s][start + i].p != srvs[i])
374			{
375				shader_resource_views[s][start + i] = srvs[i];
376				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
377				last_different = i;
378			}
379		}
380		if(last_different >= 0)
381		{
382			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
383			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
384		}
385	}
386
387	template<unsigned s>
388	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
389	{
390		int last_different = -1;
391		for(unsigned i = 0; i < count; ++i)
392		{
393			if(samplers[s][start + i].p != samps[i])
394			{
395				samplers[s][start + i] = samps[i];
396				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
397				last_different = i;
398			}
399			if(last_different >= 0)
400			{
401				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
402				update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
403			}
404		}
405	}
406
407#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
408	virtual void STDMETHODCALLTYPE XS##SetShader( \
409		ID3D11##Stage##Shader *pShader \
410		SET_SHADER_EXTRA_ARGS) \
411	{ \
412		SYNCHRONIZED; \
413		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
414	} \
415	virtual void STDMETHODCALLTYPE XS##GetShader(\
416		ID3D11##Stage##Shader **ppShader \
417		GET_SHADER_EXTRA_ARGS) \
418	{ \
419		SYNCHRONIZED; \
420		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
421	} \
422	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
423		unsigned start, \
424		unsigned count, \
425		ID3D11Buffer *const* constant_buffers) \
426	{ \
427		SYNCHRONIZED; \
428		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
429	} \
430	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
431		unsigned start, \
432		unsigned count, \
433		ID3D11Buffer **out_constant_buffers) \
434	{ \
435		SYNCHRONIZED; \
436		for(unsigned i = 0; i < count; ++i) \
437			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
438	} \
439	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
440		unsigned start, \
441		unsigned count, \
442		ID3D11ShaderResourceView *const *new_shader_resource_views) \
443	{ \
444		SYNCHRONIZED; \
445		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
446	} \
447	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
448		unsigned start, \
449		unsigned count, \
450		ID3D11ShaderResourceView **out_shader_resource_views) \
451	{ \
452		SYNCHRONIZED; \
453		for(unsigned i = 0; i < count; ++i) \
454			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
455	} \
456	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
457		unsigned start, \
458		unsigned count, \
459		ID3D11SamplerState *const *new_samplers) \
460	{ \
461		SYNCHRONIZED; \
462		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
463	} \
464	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
465		unsigned start, \
466		unsigned count, \
467		ID3D11SamplerState **out_samplers) \
468	{ \
469		SYNCHRONIZED; \
470		for(unsigned i = 0; i < count; ++i) \
471			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
472	}
473
474#define DO_VS(x) x
475#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
476#define DO_PS(x) x
477#define DO_HS(x)
478#define DO_DS(x)
479#define DO_CS(x)
480	IMPLEMENT_SHADER_STAGE(VS, Vertex)
481	IMPLEMENT_SHADER_STAGE(GS, Geometry)
482	IMPLEMENT_SHADER_STAGE(PS, Pixel)
483
484#if API >= 11
485	IMPLEMENT_SHADER_STAGE(HS, Hull)
486	IMPLEMENT_SHADER_STAGE(DS, Domain)
487	IMPLEMENT_SHADER_STAGE(CS, Compute)
488
489	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
490		unsigned start,
491		unsigned count,
492		ID3D11UnorderedAccessView *const *new_unordered_access_views,
493		const unsigned *new_uav_initial_counts)
494	{
495		SYNCHRONIZED;
496		for(unsigned i = 0; i < count; ++i)
497			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
498	}
499
500	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
501		unsigned start,
502		unsigned count,
503		ID3D11UnorderedAccessView **out_unordered_access_views)
504	{
505		SYNCHRONIZED;
506		for(unsigned i = 0; i < count; ++i)
507			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
508	}
509#endif
510
511	template<unsigned s>
512	void update_stage()
513	{
514		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
515		{
516			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
517				--num_shader_resource_views[s];
518			if((1 << s) & caps.stages_with_sampling)
519			{
520				struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
521				unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
522				for(unsigned i = 0; i < num_views_to_bind; ++i)
523				{
524					views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
525				}
526				switch(s)
527				{
528				case PIPE_SHADER_VERTEX:
529					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
530					break;
531				case PIPE_SHADER_FRAGMENT:
532					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
533					break;
534				case PIPE_SHADER_GEOMETRY:
535					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
536					break;
537				}
538			}
539		}
540
541		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
542		{
543			while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
544				--num_samplers[s];
545			if((1 << s) & caps.stages_with_sampling)
546			{
547				void* samplers_to_bind[PIPE_MAX_SAMPLERS];
548				unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
549				for(unsigned i = 0; i < num_samplers_to_bind; ++i)
550				{
551					// index can be -1 to access sampler_csos[s].ld
552					samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
553				}
554				switch(s)
555				{
556				case PIPE_SHADER_VERTEX:
557					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
558					break;
559				case PIPE_SHADER_FRAGMENT:
560					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
561					break;
562				case PIPE_SHADER_GEOMETRY:
563					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
564					break;
565				}
566			}
567		}
568	}
569
570	void update_state()
571	{
572		update_stage<D3D11_STAGE_PS>();
573		update_stage<D3D11_STAGE_VS>();
574		update_stage<D3D11_STAGE_GS>();
575#if API >= 11
576		update_stage<D3D11_STAGE_HS>();
577		update_stage<D3D11_STAGE_DS>();
578		update_stage<D3D11_STAGE_CS>();
579#endif
580
581		if(update_flags & UPDATE_VERTEX_BUFFERS)
582		{
583			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
584				--num_vertex_buffers;
585			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
586		}
587
588		update_flags = 0;
589	}
590
591	virtual void STDMETHODCALLTYPE IASetInputLayout(
592		ID3D11InputLayout *new_input_layout)
593	{
594		SYNCHRONIZED;
595		if(new_input_layout != input_layout.p)
596		{
597			input_layout = new_input_layout;
598			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
599		}
600	}
601
602	virtual void STDMETHODCALLTYPE IAGetInputLayout(
603		ID3D11InputLayout **out_input_layout)
604	{
605		SYNCHRONIZED;
606		*out_input_layout = input_layout.ref();
607	}
608
609	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
610		unsigned start,
611		unsigned count,
612		ID3D11Buffer *const *new_vertex_buffers,
613		const unsigned *new_strides,
614		const unsigned *new_offsets)
615	{
616		SYNCHRONIZED;
617		int last_different = -1;
618		for(unsigned i = 0; i < count; ++i)
619		{
620			ID3D11Buffer* buffer = new_vertex_buffers[i];
621			if(buffer != input_buffers[start + i].p
622				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
623				|| vertex_buffers[start + i].stride != new_offsets[i]
624			)
625			{
626				input_buffers[start + i] = buffer;
627				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
628				vertex_buffers[start + i].buffer_offset = new_offsets[i];
629				vertex_buffers[start + i].stride = new_strides[i];
630				last_different = i;
631			}
632		}
633		if(last_different >= 0)
634		{
635			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
636			update_flags |= UPDATE_VERTEX_BUFFERS;
637		}
638	}
639
640	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
641		unsigned start,
642		unsigned count,
643		ID3D11Buffer **out_vertex_buffers,
644		unsigned *out_strides,
645		unsigned *out_offsets)
646	{
647		SYNCHRONIZED;
648		if(out_vertex_buffers)
649		{
650			for(unsigned i = 0; i < count; ++i)
651				out_vertex_buffers[i] = input_buffers[start + i].ref();
652		}
653
654		if(out_offsets)
655		{
656			for(unsigned i = 0; i < count; ++i)
657				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
658		}
659
660		if(out_strides)
661		{
662			for(unsigned i = 0; i < count; ++i)
663				out_strides[i] = vertex_buffers[start + i].stride;
664		}
665	}
666
667	void set_index_buffer()
668	{
669		pipe_index_buffer ib;
670		if(!index_buffer)
671		{
672			memset(&ib, 0, sizeof(ib));
673		}
674		else
675		{
676			if(index_format == DXGI_FORMAT_R32_UINT)
677				ib.index_size = 4;
678			else if(index_format == DXGI_FORMAT_R16_UINT)
679				ib.index_size = 2;
680			else
681				ib.index_size = 1;
682			ib.offset = index_offset;
683			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
684		}
685		pipe->set_index_buffer(pipe, &ib);
686	}
687
688	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
689		ID3D11Buffer *new_index_buffer,
690		DXGI_FORMAT new_index_format,
691		unsigned new_index_offset)
692	{
693		SYNCHRONIZED;
694		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
695		{
696			index_buffer = new_index_buffer;
697			index_format = new_index_format;
698			index_offset = new_index_offset;
699
700			set_index_buffer();
701		}
702	}
703
704	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
705		ID3D11Buffer **out_index_buffer,
706		DXGI_FORMAT *out_index_format,
707		unsigned *out_index_offset)
708	{
709		SYNCHRONIZED;
710		if(out_index_buffer)
711			*out_index_buffer = index_buffer.ref();
712		if(out_index_format)
713			*out_index_format = index_format;
714		if(out_index_offset)
715			*out_index_offset = index_offset;
716	}
717
718	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
719		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
720	{
721		SYNCHRONIZED;
722		if(primitive_topology != new_primitive_topology)
723		{
724			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
725				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
726			else
727				primitive_mode = 0;
728			primitive_topology = new_primitive_topology;
729		}
730	}
731
732	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
733		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
734	{
735		SYNCHRONIZED;
736		*out_primitive_topology = primitive_topology;
737	}
738
739	virtual void STDMETHODCALLTYPE DrawIndexed(
740		unsigned index_count,
741		unsigned start_index_location,
742		int base_vertex_location)
743	{
744		SYNCHRONIZED;
745		if(update_flags)
746			update_state();
747
748		pipe_draw_info info;
749		info.mode = primitive_mode;
750		info.indexed = TRUE;
751		info.count = index_count;
752		info.start = start_index_location;
753		info.index_bias = base_vertex_location;
754		info.min_index = 0;
755		info.max_index = ~0;
756		info.start_instance = 0;
757		info.instance_count = 1;
758
759		pipe->draw_vbo(pipe, &info);
760	}
761
762	virtual void STDMETHODCALLTYPE Draw(
763		unsigned vertex_count,
764		unsigned start_vertex_location)
765	{
766		SYNCHRONIZED;
767		if(update_flags)
768			update_state();
769
770		pipe_draw_info info;
771		info.mode = primitive_mode;
772		info.indexed = FALSE;
773		info.count = vertex_count;
774		info.start = start_vertex_location;
775		info.index_bias = 0;
776		info.min_index = 0;
777		info.max_index = ~0;
778		info.start_instance = 0;
779		info.instance_count = 1;
780
781		pipe->draw_vbo(pipe, &info);
782	}
783
784	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
785		unsigned index_countPerInstance,
786		unsigned instance_count,
787		unsigned start_index_location,
788		int base_vertex_location,
789		unsigned start_instance_location)
790	{
791		SYNCHRONIZED;
792		if(update_flags)
793			update_state();
794
795		pipe_draw_info info;
796		info.mode = primitive_mode;
797		info.indexed = TRUE;
798		info.count = index_countPerInstance;
799		info.start = start_index_location;
800		info.index_bias = base_vertex_location;
801		info.min_index = 0;
802		info.max_index = ~0;
803		info.start_instance = start_instance_location;
804		info.instance_count = instance_count;
805
806		pipe->draw_vbo(pipe, &info);
807	}
808
809	virtual void STDMETHODCALLTYPE DrawInstanced(
810		unsigned vertex_countPerInstance,
811		unsigned instance_count,
812		unsigned start_vertex_location,
813		unsigned start_instance_location)
814	{
815		SYNCHRONIZED;
816		if(update_flags)
817			update_state();
818
819		pipe_draw_info info;
820		info.mode = primitive_mode;
821		info.indexed = FALSE;
822		info.count = vertex_countPerInstance;
823		info.start = start_vertex_location;
824		info.index_bias = 0;
825		info.min_index = 0;
826		info.max_index = ~0;
827		info.start_instance = start_instance_location;
828		info.instance_count = instance_count;
829
830		pipe->draw_vbo(pipe, &info);
831	}
832
833	virtual void STDMETHODCALLTYPE DrawAuto(void)
834	{
835		if(!caps.so)
836			return;
837
838		SYNCHRONIZED;
839		if(update_flags)
840			update_state();
841
842		pipe->draw_stream_output(pipe, primitive_mode);
843	}
844
845	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
846		ID3D11Buffer *buffer,
847		unsigned aligned_byte_offset)
848	{
849		SYNCHRONIZED;
850		if(update_flags)
851			update_state();
852
853		struct {
854			unsigned count;
855			unsigned instance_count;
856			unsigned start;
857			unsigned index_bias;
858		} data;
859
860		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
861
862		pipe_draw_info info;
863		info.mode = primitive_mode;
864		info.indexed = TRUE;
865		info.start = data.start;
866		info.count = data.count;
867		info.index_bias = data.index_bias;
868		info.min_index = 0;
869		info.max_index = ~0;
870		info.start_instance = 0;
871		info.instance_count = data.instance_count;
872
873		pipe->draw_vbo(pipe, &info);
874	}
875
876	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
877		ID3D11Buffer *buffer,
878		unsigned aligned_byte_offset)
879	{
880		SYNCHRONIZED;
881		if(update_flags)
882			update_state();
883
884		struct {
885			unsigned count;
886			unsigned instance_count;
887			unsigned start;
888		} data;
889
890		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
891
892		pipe_draw_info info;
893		info.mode = primitive_mode;
894		info.indexed = FALSE;
895		info.start = data.start;
896		info.count = data.count;
897		info.index_bias = 0;
898		info.min_index = 0;
899		info.max_index = ~0;
900		info.start_instance = 0;
901		info.instance_count = data.instance_count;
902
903		pipe->draw_vbo(pipe, &info);
904	}
905
906#if API >= 11
907	virtual void STDMETHODCALLTYPE Dispatch(
908		unsigned thread_group_count_x,
909		unsigned thread_group_count_y,
910		unsigned thread_group_count_z)
911	{
912// uncomment this when this is implemented
913//		SYNCHRONIZED;
914//		if(update_flags)
915//			update_state();
916	}
917
918	virtual void STDMETHODCALLTYPE DispatchIndirect(
919		ID3D11Buffer *buffer,
920		unsigned aligned_byte_offset)
921	{
922// uncomment this when this is implemented
923//		SYNCHRONIZED;
924//		if(update_flags)
925//			update_state();
926	}
927#endif
928
929	void set_clip()
930	{
931		pipe_clip_state clip;
932		clip.nr = 0;
933		clip.depth_clamp = depth_clamp;
934		pipe->set_clip_state(pipe, &clip);
935	}
936
937	virtual void STDMETHODCALLTYPE RSSetState(
938		ID3D11RasterizerState *new_rasterizer_state)
939	{
940		SYNCHRONIZED;
941		if(new_rasterizer_state != rasterizer_state.p)
942		{
943			rasterizer_state = new_rasterizer_state;
944			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
945			bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
946			if(depth_clamp != new_depth_clamp)
947			{
948				depth_clamp = new_depth_clamp;
949				set_clip();
950			}
951		}
952	}
953
954	virtual void STDMETHODCALLTYPE RSGetState(
955		ID3D11RasterizerState **out_rasterizer_state)
956	{
957		SYNCHRONIZED;
958		*out_rasterizer_state = rasterizer_state.ref();
959	}
960
961	void set_viewport()
962	{
963		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
964		pipe_viewport_state viewport;
965		float half_width = viewports[0].Width * 0.5f;
966		float half_height = viewports[0].Height * 0.5f;
967
968		viewport.scale[0] = half_width;
969		viewport.scale[1] = -half_height;
970		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
971		viewport.scale[3] = 1.0f;
972		viewport.translate[0] = half_width + viewports[0].TopLeftX;
973		viewport.translate[1] = half_height + viewports[0].TopLeftY;
974		viewport.translate[2] = viewports[0].MinDepth;
975		viewport.translate[3] = 1.0f;
976		pipe->set_viewport_state(pipe, &viewport);
977	}
978
979	virtual void STDMETHODCALLTYPE RSSetViewports(
980		unsigned count,
981		const D3D11_VIEWPORT *new_viewports)
982	{
983		SYNCHRONIZED;
984		if(count)
985		{
986			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
987			{
988				viewports[0] = new_viewports[0];
989				set_viewport();
990			}
991			for(unsigned i = 1; i < count; ++i)
992				viewports[i] = new_viewports[i];
993		}
994		else if(num_viewports)
995		{
996			// TODO: what should we do here?
997			memset(&viewports[0], 0, sizeof(viewports[0]));
998			set_viewport();
999		}
1000		num_viewports = count;
1001	}
1002
1003	virtual void STDMETHODCALLTYPE RSGetViewports(
1004		unsigned *out_count,
1005		D3D11_VIEWPORT *out_viewports)
1006	{
1007		SYNCHRONIZED;
1008		if(out_viewports)
1009		{
1010			unsigned i;
1011			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1012				out_viewports[i] = viewports[i];
1013
1014			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1015		}
1016
1017		*out_count = num_viewports;
1018	}
1019
1020	void set_scissor()
1021	{
1022		pipe_scissor_state scissor;
1023		scissor.minx = scissor_rects[0].left;
1024		scissor.miny = scissor_rects[0].top;
1025		scissor.maxx = scissor_rects[0].right;
1026		scissor.maxy = scissor_rects[0].bottom;
1027		pipe->set_scissor_state(pipe, &scissor);
1028	}
1029
1030	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1031		unsigned count,
1032		const D3D11_RECT *new_rects)
1033	{
1034		SYNCHRONIZED;
1035		if(count)
1036		{
1037			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1038			{
1039				scissor_rects[0] = new_rects[0];
1040				set_scissor();
1041			}
1042			for(unsigned i = 1; i < count; ++i)
1043				scissor_rects[i] = new_rects[i];
1044		}
1045		else if(num_scissor_rects)
1046		{
1047			// TODO: what should we do here?
1048			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1049			set_scissor();
1050		}
1051
1052		num_scissor_rects = count;
1053	}
1054
1055	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1056		unsigned *out_count,
1057		D3D11_RECT *out_rects)
1058	{
1059		SYNCHRONIZED;
1060		if(out_rects)
1061		{
1062			unsigned i;
1063			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1064				out_rects[i] = scissor_rects[i];
1065
1066			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1067		}
1068
1069		*out_count = num_scissor_rects;
1070	}
1071
1072	virtual void STDMETHODCALLTYPE OMSetBlendState(
1073		ID3D11BlendState *new_blend_state,
1074		const float new_blend_factor[4],
1075		unsigned new_sample_mask)
1076	{
1077		SYNCHRONIZED;
1078		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1079
1080		if(blend_state.p != new_blend_state)
1081		{
1082			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1083			blend_state = new_blend_state;
1084		}
1085
1086		// Windows D3D11 does this, even though it's apparently undocumented
1087		if(!new_blend_factor)
1088			new_blend_factor = white;
1089
1090		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1091		{
1092			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1093			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1094		}
1095
1096		if(sample_mask != new_sample_mask)
1097		{
1098			pipe->set_sample_mask(pipe, new_sample_mask);
1099			sample_mask = new_sample_mask;
1100		}
1101	}
1102
1103	virtual void STDMETHODCALLTYPE OMGetBlendState(
1104		ID3D11BlendState **out_blend_state,
1105		float out_blend_factor[4],
1106		unsigned *out_sample_mask)
1107	{
1108		SYNCHRONIZED;
1109		if(out_blend_state)
1110			*out_blend_state = blend_state.ref();
1111		if(out_blend_factor)
1112			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1113		if(out_sample_mask)
1114			*out_sample_mask = sample_mask;
1115	}
1116
1117	void set_stencil_ref()
1118	{
1119		struct pipe_stencil_ref sref;
1120		sref.ref_value[0] = stencil_ref;
1121		sref.ref_value[1] = stencil_ref;
1122		pipe->set_stencil_ref(pipe, &sref);
1123	}
1124
1125	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1126		ID3D11DepthStencilState *new_depth_stencil_state,
1127		unsigned new_stencil_ref)
1128	{
1129		SYNCHRONIZED;
1130		if(new_depth_stencil_state != depth_stencil_state.p)
1131		{
1132			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1133			depth_stencil_state = new_depth_stencil_state;
1134		}
1135
1136		if(new_stencil_ref != stencil_ref)
1137		{
1138			stencil_ref = new_stencil_ref;
1139			set_stencil_ref();
1140		}
1141	}
1142
1143	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1144		ID3D11DepthStencilState **out_depth_stencil_state,
1145		unsigned *out_stencil_ref)
1146	{
1147		SYNCHRONIZED;
1148		if(*out_depth_stencil_state)
1149			*out_depth_stencil_state = depth_stencil_state.ref();
1150		if(out_stencil_ref)
1151			*out_stencil_ref = stencil_ref;
1152	}
1153
1154	void set_framebuffer()
1155	{
1156		struct pipe_framebuffer_state fb;
1157		memset(&fb, 0, sizeof(fb));
1158		if(depth_stencil_view)
1159		{
1160			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1161			fb.zsbuf = surf;
1162			if(surf->width > fb.width)
1163				fb.width = surf->width;
1164			if(surf->height > fb.height)
1165				fb.height = surf->height;
1166		}
1167		fb.nr_cbufs = num_render_target_views;
1168		unsigned i;
1169		for(i = 0; i < num_render_target_views; ++i)
1170		{
1171			if(render_target_views[i])
1172			{
1173				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1174				fb.cbufs[i] = surf;
1175				if(surf->width > fb.width)
1176					fb.width = surf->width;
1177				if(surf->height > fb.height)
1178					fb.height = surf->height;
1179			}
1180		}
1181
1182		pipe->set_framebuffer_state(pipe, &fb);
1183	}
1184
1185	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1186	 * Hopefully nobody relies on this happening
1187	 */
1188
1189	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1190		unsigned count,
1191		ID3D11RenderTargetView *const *new_render_target_views,
1192		ID3D11DepthStencilView  *new_depth_stencil_view)
1193	{
1194		SYNCHRONIZED;
1195		if(!new_render_target_views)
1196			count = 0;
1197		if(count == num_render_target_views)
1198		{
1199			for(unsigned i = 0; i < count; ++i)
1200			{
1201				if(new_render_target_views[i] != render_target_views[i].p)
1202					goto changed;
1203			}
1204			return;
1205		}
1206changed:
1207		depth_stencil_view = new_depth_stencil_view;
1208		unsigned i;
1209		for(i = 0; i < count; ++i)
1210		{
1211			render_target_views[i] = new_render_target_views[i];
1212#if API >= 11
1213			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1214#endif
1215		}
1216		for(; i < num_render_target_views; ++i)
1217			render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1218		num_render_target_views = count;
1219		set_framebuffer();
1220	}
1221
1222	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1223		unsigned count,
1224		ID3D11RenderTargetView **out_render_target_views,
1225		ID3D11DepthStencilView  **out_depth_stencil_view)
1226	{
1227		SYNCHRONIZED;
1228		if(out_render_target_views)
1229		{
1230			unsigned i;
1231			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1232				out_render_target_views[i] = render_target_views[i].ref();
1233
1234			for(; i < count; ++i)
1235				out_render_target_views[i] = 0;
1236		}
1237
1238		if(out_depth_stencil_view)
1239			*out_depth_stencil_view = depth_stencil_view.ref();
1240	}
1241
1242#if API >= 11
1243	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1244	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1245		unsigned rtv_count,
1246		ID3D11RenderTargetView *const *new_render_target_views,
1247		ID3D11DepthStencilView  *new_depth_stencil_view,
1248		unsigned uav_start,
1249		unsigned uav_count,
1250		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1251		const unsigned *new_uav_initial_counts)
1252	{
1253		SYNCHRONIZED;
1254		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1255			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1256
1257		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1258		{
1259			for(unsigned i = 0; i < uav_count; ++i)
1260			{
1261				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1262				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1263			}
1264		}
1265	}
1266
1267	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1268		unsigned rtv_count,
1269		ID3D11RenderTargetView **out_render_target_views,
1270		ID3D11DepthStencilView  **out_depth_stencil_view,
1271		unsigned uav_start,
1272		unsigned uav_count,
1273		ID3D11UnorderedAccessView **out_unordered_access_views)
1274	{
1275		SYNCHRONIZED;
1276		if(out_render_target_views)
1277			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1278
1279		if(out_unordered_access_views)
1280		{
1281			for(unsigned i = 0; i < uav_count; ++i)
1282				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1283		}
1284	}
1285#endif
1286
1287	virtual void STDMETHODCALLTYPE SOSetTargets(
1288		unsigned count,
1289		ID3D11Buffer *const *new_so_targets,
1290		const unsigned *new_offsets)
1291	{
1292		SYNCHRONIZED;
1293		unsigned i;
1294		if(!new_so_targets)
1295			count = 0;
1296		bool changed = false;
1297		for(i = 0; i < count; ++i)
1298		{
1299			ID3D11Buffer* buffer = new_so_targets[i];
1300			if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1301			{
1302				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1303				so_targets[i] = buffer;
1304				so_offsets[i] = new_offsets[i];
1305				changed = true;
1306			}
1307		}
1308		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1309		{
1310			if(so_targets[i].p || so_offsets[i])
1311			{
1312				changed = true;
1313				so_targets[i] = (ID3D11Buffer*)0;
1314				so_offsets[i] = 0;
1315			}
1316		}
1317		num_so_targets = count;
1318
1319		if(changed && caps.so)
1320			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1321	}
1322
1323	virtual void STDMETHODCALLTYPE SOGetTargets(
1324		unsigned count,
1325		ID3D11Buffer **out_so_targets
1326#if API < 11
1327		, UINT *out_offsets
1328#endif
1329		)
1330	{
1331		SYNCHRONIZED;
1332		for(unsigned i = 0; i < count; ++i)
1333		{
1334			out_so_targets[i] = so_targets[i].ref();
1335#if API < 11
1336			out_offsets[i] = so_offsets[i];
1337#endif
1338		}
1339	}
1340
1341	virtual void STDMETHODCALLTYPE Begin(
1342		ID3D11Asynchronous *async)
1343	{
1344		SYNCHRONIZED;
1345		if(caps.queries)
1346			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1347	}
1348
1349	virtual void STDMETHODCALLTYPE End(
1350		ID3D11Asynchronous *async)
1351	{
1352		SYNCHRONIZED;
1353		if(caps.queries)
1354			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1355	}
1356
1357	virtual HRESULT STDMETHODCALLTYPE GetData(
1358		ID3D11Asynchronous *iasync,
1359		void *out_data,
1360		unsigned data_size,
1361		unsigned get_data_flags)
1362	{
1363		SYNCHRONIZED;
1364		if(!caps.queries)
1365			return E_NOTIMPL;
1366
1367		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1368		void* tmp_data = alloca(async->data_size);
1369		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1370		if(out_data)
1371			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1372		return ret ? S_OK : S_FALSE;
1373	}
1374
1375	void set_render_condition()
1376	{
1377		if(caps.render_condition)
1378		{
1379			if(!render_predicate)
1380				pipe->render_condition(pipe, 0, 0);
1381			else
1382			{
1383				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1384				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1385				{
1386					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1387					pipe->render_condition(pipe, predicate->query, mode);
1388				}
1389				else
1390				{
1391					/* TODO: add inverted predication to Gallium*/
1392					pipe->render_condition(pipe, 0, 0);
1393				}
1394			}
1395		}
1396	}
1397
1398	virtual void STDMETHODCALLTYPE SetPredication(
1399		ID3D11Predicate *new_predicate,
1400		BOOL new_predicate_value)
1401	{
1402		SYNCHRONIZED;
1403		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1404		{
1405			render_predicate = new_predicate;
1406			render_predicate_value = new_predicate_value;
1407			set_render_condition();
1408		}
1409	}
1410
1411	virtual void STDMETHODCALLTYPE GetPredication(
1412		ID3D11Predicate **out_predicate,
1413		BOOL *out_predicate_value)
1414	{
1415		SYNCHRONIZED;
1416		if(out_predicate)
1417			*out_predicate = render_predicate.ref();
1418		if(out_predicate_value)
1419			*out_predicate_value = render_predicate_value;
1420	}
1421
1422	static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1423	{
1424		if(subresource <= resource->last_level)
1425		{
1426			return subresource;
1427		}
1428		else
1429		{
1430			unsigned levels = resource->last_level + 1;
1431			return subresource % levels;
1432		}
1433	}
1434
1435	static unsigned d3d11_subresource_to_face(struct pipe_resource* resource, unsigned subresource)
1436	{
1437		if(subresource <= resource->last_level)
1438		{
1439			return 0;
1440		}
1441		else
1442		{
1443			unsigned levels = resource->last_level + 1;
1444			return subresource / levels;
1445		}
1446	}
1447
1448
1449	/* TODO: deferred contexts will need a different implementation of this,
1450	 * because we can't put the transfer info into the resource itself.
1451	 * Also, there are very different restrictions, for obvious reasons.
1452	 */
1453	virtual HRESULT STDMETHODCALLTYPE Map(
1454		ID3D11Resource *iresource,
1455		unsigned subresource,
1456		D3D11_MAP map_type,
1457		unsigned map_flags,
1458		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1459	{
1460		SYNCHRONIZED;
1461		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1462		if(resource->transfers.count(subresource))
1463			return E_FAIL;
1464		unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1465		unsigned face = d3d11_subresource_to_face(resource->resource, subresource);
1466		pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1467		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1468		unsigned usage = 0;
1469		if(map_type == D3D11_MAP_READ)
1470			usage = PIPE_TRANSFER_READ;
1471		else if(map_type == D3D11_MAP_WRITE)
1472			usage = PIPE_TRANSFER_WRITE;
1473		else if(map_type == D3D11_MAP_READ_WRITE)
1474			usage = PIPE_TRANSFER_READ_WRITE;
1475		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1476			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1477		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1478			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1479		else
1480			return E_INVALIDARG;
1481		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1482			usage |= PIPE_TRANSFER_DONTBLOCK;
1483		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1484		if(!transfer) {
1485			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1486				return DXGI_ERROR_WAS_STILL_DRAWING;
1487			else
1488				return E_FAIL;
1489		}
1490		resource->transfers[subresource] = transfer;
1491		mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1492		mapped_resource->RowPitch = transfer->stride;
1493		mapped_resource->DepthPitch = transfer->layer_stride;
1494		return S_OK;
1495	}
1496
1497	virtual void STDMETHODCALLTYPE Unmap(
1498		ID3D11Resource *iresource,
1499		unsigned subresource)
1500	{
1501		SYNCHRONIZED;
1502		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1503		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1504		if(i != resource->transfers.end())
1505		{
1506			pipe->transfer_unmap(pipe, i->second);
1507			pipe->transfer_destroy(pipe, i->second);
1508			resource->transfers.erase(i);
1509		}
1510	}
1511
1512	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1513		ID3D11Resource *dst_resource,
1514		unsigned dst_subresource,
1515		unsigned dst_x,
1516		unsigned dst_y,
1517		unsigned dst_z,
1518		ID3D11Resource *src_resource,
1519		unsigned src_subresource,
1520		const D3D11_BOX *src_box)
1521	{
1522		SYNCHRONIZED;
1523		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1524		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1525		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1526		unsigned dst_face = d3d11_subresource_to_face(dst->resource, dst_subresource);
1527		unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1528		unsigned src_face = d3d11_subresource_to_face(src->resource, src_subresource);
1529		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1530		pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1531		{
1532			pipe->resource_copy_region(pipe,
1533				dst->resource, dst_level, dst_x, dst_y, dst_z,
1534				src->resource, src_level, &box);
1535		}
1536	}
1537
1538	virtual void STDMETHODCALLTYPE CopyResource(
1539		ID3D11Resource *dst_resource,
1540		ID3D11Resource *src_resource)
1541	{
1542		SYNCHRONIZED;
1543		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1544		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1545		unsigned level;
1546		for(level = 0; level <= dst->resource->last_level; ++level)
1547		{
1548		        unsigned layers = 1;
1549			pipe_box box;
1550			if (dst->resource->target == PIPE_TEXTURE_CUBE)
1551				layers = 6;
1552			else if (dst->resource->target == PIPE_TEXTURE_3D)
1553				layers = u_minify(dst->resource->depth0, level);
1554			/* else layers = dst->resource->array_size; */
1555			box.x = box.y = box.z = 0;
1556			box.width = u_minify(dst->resource->width0, level);
1557			box.height = u_minify(dst->resource->height0, level);
1558			box.depth = layers;
1559			pipe->resource_copy_region(pipe,
1560						   dst->resource, level, 0, 0, 0,
1561						   src->resource, level, &box);
1562		}
1563	}
1564
1565	virtual void STDMETHODCALLTYPE UpdateSubresource(
1566		ID3D11Resource *dst_resource,
1567		unsigned dst_subresource,
1568		const D3D11_BOX *pDstBox,
1569		const void *pSrcData,
1570		unsigned src_row_pitch,
1571		unsigned src_depth_pitch)
1572	{
1573		SYNCHRONIZED;
1574		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1575		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1576		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1577		pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1578		pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1579	}
1580
1581#if API >= 11
1582	virtual void STDMETHODCALLTYPE CopyStructureCount(
1583		ID3D11Buffer *dst_buffer,
1584		unsigned dst_aligned_byte_offset,
1585		ID3D11UnorderedAccessView *src_view)
1586	{
1587		SYNCHRONIZED;
1588	}
1589#endif
1590
1591	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1592		ID3D11RenderTargetView *render_target_view,
1593		const float color[4])
1594	{
1595		SYNCHRONIZED;
1596		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1597		pipe->clear_render_target(pipe, view->object, color, 0, 0, view->object->width, view->object->height);
1598	}
1599
1600	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1601		ID3D11DepthStencilView  *depth_stencil_view,
1602		unsigned clear_flags,
1603		float depth,
1604		UINT8 stencil)
1605	{
1606		SYNCHRONIZED;
1607		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1608		unsigned flags = 0;
1609		if(clear_flags & D3D11_CLEAR_DEPTH)
1610			flags |= PIPE_CLEAR_DEPTH;
1611		if(clear_flags & D3D11_CLEAR_STENCIL)
1612			flags |= PIPE_CLEAR_STENCIL;
1613		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1614	}
1615
1616#if API >= 11
1617	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1618		ID3D11UnorderedAccessView *unordered_access_view,
1619		const unsigned values[4])
1620	{
1621		SYNCHRONIZED;
1622	}
1623
1624	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1625			ID3D11UnorderedAccessView *unordered_access_view,
1626			const float values[4])
1627	{
1628		SYNCHRONIZED;
1629	}
1630#endif
1631
1632	void restore_gallium_state_blit_only()
1633	{
1634		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1635		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1636		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1637		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1638		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1639		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1640		if(caps.gs)
1641			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1642		set_framebuffer();
1643		set_viewport();
1644		set_clip();
1645		set_render_condition();
1646		// TODO: restore stream output
1647
1648		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1649	}
1650
1651	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1652	{
1653		SYNCHRONIZED;
1654		restore_gallium_state_blit_only();
1655	}
1656
1657	virtual void STDMETHODCALLTYPE GenerateMips(
1658		ID3D11ShaderResourceView *shader_resource_view)
1659	{
1660		SYNCHRONIZED;
1661
1662		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1663		if(caps.gs)
1664			pipe->bind_gs_state(pipe, 0);
1665		if(caps.so)
1666			pipe->bind_stream_output_state(pipe, 0);
1667		if(pipe->render_condition)
1668			pipe->render_condition(pipe, 0, 0);
1669		util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1670		restore_gallium_state_blit_only();
1671	}
1672
1673	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1674	{
1675		SYNCHRONIZED;
1676		restore_gallium_state_blit_only();
1677
1678		set_index_buffer();
1679		set_stencil_ref();
1680		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1681		pipe->set_sample_mask(pipe, sample_mask);
1682
1683		for(unsigned s = 0; s < 3; ++s)
1684		{
1685			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1686			for(unsigned i = 0; i < num; ++i)
1687				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1688		}
1689
1690		if(caps.so)
1691			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1692
1693		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1694		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1695
1696		set_scissor();
1697	}
1698
1699#if API >= 11
1700	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1701	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1702		ID3D11Resource *iresource,
1703		float min_lod)
1704	{
1705		SYNCHRONIZED;
1706		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1707		if(resource->min_lod != min_lod)
1708		{
1709			// TODO: actually do anything?
1710			resource->min_lod = min_lod;
1711		}
1712	}
1713
1714	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1715		ID3D11Resource *iresource)
1716	{
1717		SYNCHRONIZED;
1718		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1719		return resource->min_lod;
1720	}
1721#endif
1722
1723	virtual void STDMETHODCALLTYPE ResolveSubresource(
1724		ID3D11Resource *dst_resource,
1725		unsigned dst_subresource,
1726		ID3D11Resource *src_resource,
1727		unsigned src_subresource,
1728		DXGI_FORMAT format)
1729	{
1730		SYNCHRONIZED;
1731		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1732		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1733		struct pipe_resolve_info info;
1734
1735		info.dst.res = dst->resource;
1736		info.src.res = src->resource;
1737		info.dst.level = 0;
1738		info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
1739		info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource);
1740
1741		info.src.x0 = 0;
1742		info.src.x1 = info.src.res->width0;
1743		info.src.y0 = 0;
1744		info.src.y1 = info.src.res->height0;
1745		info.dst.x0 = 0;
1746		info.dst.x1 = info.dst.res->width0;
1747		info.dst.y0 = 0;
1748		info.dst.y1 = info.dst.res->height0;
1749
1750		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
1751
1752		pipe->resource_resolve(pipe, &info);
1753	}
1754
1755#if API >= 11
1756	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1757		ID3D11CommandList *command_list,
1758		BOOL restore_context_state)
1759	{
1760		SYNCHRONIZED;
1761	}
1762
1763	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1764		BOOL restore_deferred_context_state,
1765		ID3D11CommandList **out_command_list)
1766	{
1767		SYNCHRONIZED;
1768		return E_NOTIMPL;
1769	}
1770#endif
1771
1772	virtual void STDMETHODCALLTYPE ClearState(void)
1773	{
1774		/* we don't take a lock here because we would deadlock otherwise
1775		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1776		 * However, I can't think of any correct usage that would be affected by this
1777		 * being non-atomic, and making this atomic is quite expensive and complicates
1778		 * the code
1779		 */
1780
1781		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1782		// TODO: make sure all this gets inlined, which might require more compiler flags
1783		// TODO: optimize this
1784#if API >= 11
1785		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1786		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1787		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1788		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1789		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1790		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1791#else
1792		GalliumD3D11DeviceContext::PSSetShader(0);
1793		GalliumD3D11DeviceContext::GSSetShader(0);
1794		GalliumD3D11DeviceContext::VSSetShader(0);
1795#endif
1796
1797		GalliumD3D11DeviceContext::IASetInputLayout(0);
1798		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1799		GalliumD3D11DeviceContext::RSSetState(0);
1800		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1801		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1802		GalliumD3D11DeviceContext::SetPredication(0, 0);
1803		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1804
1805		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1806		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1807		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1808#if API >= 11
1809		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1810		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1811		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1812#endif
1813
1814		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1815#if API >= 11
1816		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1817#else
1818		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1819#endif
1820		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1821
1822		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1823		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1824		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1825#if API >= 11
1826		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1827		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1828		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1829#endif
1830
1831		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1832		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1833		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1834#if API >= 11
1835		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1836		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1837		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1838#endif
1839
1840		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1841		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1842	}
1843
1844	virtual void STDMETHODCALLTYPE Flush(void)
1845	{
1846		SYNCHRONIZED;
1847                pipe->flush(pipe, 0);
1848	}
1849
1850	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1851	 * cleanly unbound from the pipeline.
1852	 * In Direct3D 11, the pipeline holds a reference.
1853	 *
1854	 * Note that instead of always scanning the pipeline on destruction, we could
1855	 * maintain the internal reference count on DirectX 10 and use it to check if an
1856	 * object is still bound.
1857	 * Presumably, on average, scanning is faster if the application is well written.
1858	 */
1859#if API < 11
1860#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1861	void Unbind##name(ID3D11##name* state) \
1862	{ \
1863		SYNCHRONIZED; \
1864		if((void*)state == (void*)member.p) \
1865		{ \
1866			member.p = 0; \
1867			pipe->bind_##gallium##_state(pipe, default_##def); \
1868		} \
1869	}
1870	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1871	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1872	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1873	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1874	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1875	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1876	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1877
1878	void UnbindPredicate(ID3D11Predicate* predicate)
1879	{
1880		SYNCHRONIZED;
1881		if(predicate == render_predicate)
1882		{
1883			render_predicate.p = NULL;
1884			render_predicate_value = 0;
1885			pipe->render_condition(pipe, 0, 0);
1886		}
1887	}
1888
1889	void UnbindSamplerState(ID3D11SamplerState* state)
1890	{
1891		SYNCHRONIZED;
1892		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1893		{
1894			for(unsigned i = 0; i < num_samplers[s]; ++i)
1895			{
1896				if(samplers[s][i] == state)
1897				{
1898					samplers[s][i].p = NULL;
1899					sampler_csos[s].v[i] = NULL;
1900					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1901				}
1902			}
1903		}
1904	}
1905
1906	void UnbindBuffer(ID3D11Buffer* buffer)
1907	{
1908		SYNCHRONIZED;
1909		if(buffer == index_buffer)
1910		{
1911			index_buffer.p = 0;
1912			index_format = DXGI_FORMAT_UNKNOWN;
1913			index_offset = 0;
1914			struct pipe_index_buffer ib;
1915			memset(&ib, 0, sizeof(ib));
1916			pipe->set_index_buffer(pipe, &ib);
1917		}
1918
1919		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1920		{
1921			if(buffer == input_buffers[i])
1922			{
1923				input_buffers[i].p = 0;
1924				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1925				update_flags |= UPDATE_VERTEX_BUFFERS;
1926			}
1927		}
1928
1929		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1930		{
1931			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1932			{
1933				if(constant_buffers[s][i] == buffer)
1934				{
1935					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1936					pipe->set_constant_buffer(pipe, s, i, NULL);
1937				}
1938			}
1939		}
1940	}
1941
1942	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1943	{
1944		SYNCHRONIZED;
1945		if(view == depth_stencil_view)
1946		{
1947			depth_stencil_view.p = NULL;
1948			set_framebuffer();
1949		}
1950	}
1951
1952	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1953	{
1954		SYNCHRONIZED;
1955		bool any_bound = false;
1956		for(unsigned i = 0; i < num_render_target_views; ++i)
1957		{
1958			if(render_target_views[i] == view)
1959			{
1960				render_target_views[i].p = NULL;
1961				any_bound = true;
1962			}
1963		}
1964		if(any_bound)
1965			set_framebuffer();
1966	}
1967
1968	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1969	{
1970		SYNCHRONIZED;
1971		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1972		{
1973			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1974			{
1975				if(shader_resource_views[s][i] == view)
1976				{
1977					shader_resource_views[s][i].p = NULL;
1978					sampler_views[s][i] = NULL;
1979					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1980				}
1981			}
1982		}
1983	}
1984#endif
1985
1986#undef SYNCHRONIZED
1987};
1988
1989#if API >= 11
1990/* This approach serves two purposes.
1991 * First, we don't want to do an atomic operation to manipulate the reference
1992 * count every time something is bound/unbound to the pipeline, since they are
1993 * expensive.
1994 * Fortunately, the immediate context can only be used by a single thread, so
1995 * we don't have to use them, as long as a separate reference count is used
1996 * (see dual_refcnt_t).
1997 *
1998 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1999 * garbage cycle.
2000 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
2001 * one for each external reference count, while internal nonatomic_add_ref doesn't
2002 * add any.
2003 *
2004 * Note that ideally we would to eliminate the non-atomic op too, but this is more
2005 * complicated, since we would either need to use garbage collection and give up
2006 * deterministic destruction (especially bad for large textures), or scan the whole
2007 * pipeline state every time the reference count of object drops to 0, which risks
2008 * pathological slowdowns.
2009 *
2010 * Since this microoptimization should matter relatively little, let's avoid it for now.
2011 *
2012 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
2013 * Eliminating the atomic ops for deferred contexts seems substantially harder.
2014 * This might be a problem if they are used in a one-shot multithreaded rendering
2015 * fashion, where SMP cacheline bouncing on the reference count may be visible.
2016 *
2017 * The idea would be to attach a structure of reference counts indexed by deferred
2018 * context id to each object. Ideally, this should be organized like ext2 block pointers.
2019 *
2020 * Every deferred context would get a reference count in its own cacheline.
2021 * The external count is protected by a lock bit, and there is also a "lock bit" in each
2022 * internal count.
2023 *
2024 * When the external count has to be dropped to 0, the lock bit is taken and all internal
2025 * reference counts are scanned, taking a count of them. A flag would also be set on them.
2026 * Deferred context manipulation would notice the flag, and update the count.
2027 * Once the count goes to zero, the object is freed.
2028 *
2029 * The problem of this is that if the external reference count ping-pongs between
2030 * zero and non-zero, the scans will take a lot of time.
2031 *
2032 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2033 * each binary tree node would have a "determined bit", which would be invalidated
2034 * by manipulations.
2035 *
2036 * However, all this complexity might actually be a loss in most cases, so let's just
2037 * stick to a single atomic refcnt for now.
2038 *
2039 * Also, we don't even support deferred contexts yet, so this can wait.
2040 */
2041struct nonatomic_device_child_ptr_traits
2042{
2043	static void add_ref(void* p)
2044	{
2045		if(p)
2046			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2047	}
2048
2049	static void release(void* p)
2050	{
2051		if(p)
2052			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2053	}
2054};
2055
2056struct GalliumD3D11ImmediateDeviceContext
2057	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2058{
2059	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2060	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2061	{
2062		// not necessary, but tests that the API at least basically works
2063		ClearState();
2064	}
2065
2066	/* we do this since otherwise we would have a garbage cycle between this and the device */
2067	virtual ULONG STDMETHODCALLTYPE AddRef()
2068	{
2069		return this->device->AddRef();
2070	}
2071
2072	virtual ULONG STDMETHODCALLTYPE Release()
2073	{
2074		return this->device->Release();
2075	}
2076
2077	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2078	{
2079		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2080	}
2081};
2082
2083static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2084{
2085	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2086}
2087
2088static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2089{
2090	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2091}
2092
2093static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2094{
2095	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2096}
2097
2098static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2099{
2100	delete (GalliumD3D11ImmediateDeviceContext*)context;
2101}
2102#endif
2103