d3d11_context.h revision e27d72d6c3ca3512bfea524b8a7b007960841057
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	BOOL render_predicate_value;
75	float blend_color[4];
76	unsigned sample_mask;
77	unsigned stencil_ref;
78	bool depth_clamp;
79
80	void* default_input_layout;
81	void* default_rasterizer;
82	void* default_depth_stencil;
83	void* default_blend;
84	void* default_sampler;
85	void* ld_sampler;
86	void * default_shaders[D3D11_STAGES];
87
88	// derived state
89	int primitive_mode;
90	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93	struct
94	{
95		void* ld; // accessed with a -1 index from v
96		void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97	} sampler_csos[D3D11_STAGES];
98	struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99	unsigned num_shader_resource_views[D3D11_STAGES];
100	unsigned num_samplers[D3D11_STAGES];
101	unsigned num_vertex_buffers;
102	unsigned num_render_target_views;
103	unsigned num_viewports;
104	unsigned num_scissor_rects;
105	unsigned num_so_targets;
106
107	struct pipe_context* pipe;
108	unsigned update_flags;
109
110	bool owns_pipe;
111	unsigned context_flags;
112
113	GalliumD3D11Caps caps;
114
115	cso_context* cso_ctx;
116	gen_mipmap_state* gen_mipmap;
117
118#if API >= 11
119#define SYNCHRONIZED do {} while(0)
120
121	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123	{
124		caps = device->screen_caps;
125		init_context();
126	}
127
128	~GalliumD3D11DeviceContext()
129	{
130		destroy_context();
131	}
132#else
133#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137	{
138		caps = this->screen_caps;
139		init_context();
140	}
141
142	~GalliumD3D10Device()
143	{
144		destroy_context();
145	}
146#endif
147
148	void init_context()
149	{
150		if(!pipe->begin_query)
151			caps.queries = false;
152		if(!pipe->render_condition)
153			caps.render_condition = false;
154		if(!pipe->bind_gs_state)
155		{
156			caps.gs = false;
157			caps.stages = 2;
158		}
159		if(!pipe->set_stream_output_buffers)
160			caps.so = false;
161		if(!pipe->set_geometry_sampler_views)
162			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
163		if(!pipe->set_fragment_sampler_views)
164			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
165		if(!pipe->set_vertex_sampler_views)
166			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
167
168		update_flags = 0;
169
170		// pipeline state
171		memset(viewports, 0, sizeof(viewports));
172		memset(scissor_rects, 0, sizeof(scissor_rects));
173		memset(so_offsets, 0, sizeof(so_offsets));
174		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
175		index_format = DXGI_FORMAT_UNKNOWN;
176		index_offset = 0;
177		render_predicate_value = 0;
178		memset(blend_color, 0, sizeof(blend_color));
179		sample_mask = ~0;
180		stencil_ref = 0;
181		depth_clamp = 0;
182
183		// derived state
184		primitive_mode = 0;
185		memset(vertex_buffers, 0, sizeof(vertex_buffers));
186		memset(so_buffers, 0, sizeof(so_buffers));
187		memset(sampler_views, 0, sizeof(sampler_views));
188		memset(sampler_csos, 0, sizeof(sampler_csos));
189		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
190		memset(num_samplers, 0, sizeof(num_samplers));
191		num_vertex_buffers = 0;
192		num_render_target_views = 0;
193		num_viewports = 0;
194		num_scissor_rects = 0;
195		num_so_targets = 0;
196
197		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
198
199		struct pipe_rasterizer_state rasterizerd;
200		memset(&rasterizerd, 0, sizeof(rasterizerd));
201		rasterizerd.gl_rasterization_rules = 1;
202		rasterizerd.cull_face = PIPE_FACE_BACK;
203		rasterizerd.line_width = 1.0f;
204		rasterizerd.point_size = 1.0f;
205		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
206
207		struct pipe_depth_stencil_alpha_state depth_stencild;
208		memset(&depth_stencild, 0, sizeof(depth_stencild));
209		depth_stencild.depth.enabled = TRUE;
210		depth_stencild.depth.writemask = 1;
211		depth_stencild.depth.func = PIPE_FUNC_LESS;
212		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
213
214		struct pipe_blend_state blendd;
215		memset(&blendd, 0, sizeof(blendd));
216		blendd.rt[0].colormask = 0xf;
217		default_blend = pipe->create_blend_state(pipe, &blendd);
218
219		struct pipe_sampler_state samplerd;
220		memset(&samplerd, 0, sizeof(samplerd));
221		samplerd.normalized_coords = 1;
222		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
223		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
224		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
225		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
226		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
227		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
228		samplerd.border_color[0] = 1.0f;
229		samplerd.border_color[1] = 1.0f;
230		samplerd.border_color[2] = 1.0f;
231		samplerd.border_color[3] = 1.0f;
232		samplerd.min_lod = -FLT_MAX;
233		samplerd.max_lod = FLT_MAX;
234		samplerd.max_anisotropy = 1;
235		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
236
237		memset(&samplerd, 0, sizeof(samplerd));
238		samplerd.normalized_coords = 0;
239		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
240		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
241		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
242		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
243		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
244		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
245		samplerd.min_lod = -FLT_MAX;
246		samplerd.max_lod = FLT_MAX;
247		samplerd.max_anisotropy = 1;
248		ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
249
250		for(unsigned s = 0; s < D3D11_STAGES; ++s)
251		{
252			sampler_csos[s].ld = ld_sampler;
253			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
254				sampler_csos[s].v[i] = default_sampler;
255		}
256
257		// TODO: should this really be empty shaders, or should they be all-passthrough?
258		memset(default_shaders, 0, sizeof(default_shaders));
259		struct ureg_program *ureg;
260		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
261		ureg_END(ureg);
262		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
263
264		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
265		ureg_END(ureg);
266		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
267
268		cso_ctx = cso_create_context(pipe);
269		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
270
271		RestoreGalliumState();
272	}
273
274	void destroy_context()
275	{
276		util_destroy_gen_mipmap(gen_mipmap);
277		cso_destroy_context(cso_ctx);
278
279		pipe->bind_vertex_elements_state(pipe, 0);
280		pipe->delete_vertex_elements_state(pipe, default_input_layout);
281
282		pipe->bind_rasterizer_state(pipe, 0);
283		pipe->delete_rasterizer_state(pipe, default_rasterizer);
284
285		pipe->bind_depth_stencil_alpha_state(pipe, 0);
286		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
287
288		pipe->bind_blend_state(pipe, 0);
289		pipe->delete_blend_state(pipe, default_blend);
290
291		pipe->bind_fragment_sampler_states(pipe, 0, 0);
292		pipe->bind_vertex_sampler_states(pipe, 0, 0);
293		if(pipe->bind_geometry_sampler_states)
294			pipe->bind_geometry_sampler_states(pipe, 0, 0);
295		pipe->delete_sampler_state(pipe, default_sampler);
296		pipe->delete_sampler_state(pipe, ld_sampler);
297
298		pipe->bind_fs_state(pipe, 0);
299		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
300
301		pipe->bind_vs_state(pipe, 0);
302		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
303
304		if(owns_pipe)
305			pipe->destroy(pipe);
306	}
307
308	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
309	{
310		return context_flags;
311	}
312#if API >= 11
313#define SET_SHADER_EXTRA_ARGS , \
314	ID3D11ClassInstance *const *ppClassInstances, \
315	unsigned count
316#define GET_SHADER_EXTRA_ARGS , \
317		ID3D11ClassInstance **ppClassInstances, \
318		unsigned *out_count
319#else
320#define SET_SHADER_EXTRA_ARGS
321#define GET_SHADER_EXTRA_ARGS
322#endif
323
324/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
325 * Instead, you have to pass a pointer to nulls to unbind things.
326 * We do the same.
327 * TODO: is D3D10 the same?
328 */
329	template<unsigned s>
330	void xs_set_shader(GalliumD3D11Shader<>* shader)
331	{
332		if(shader != shaders[s].p)
333		{
334			shaders[s] = shader;
335			void* shader_cso = shader ? shader->object : default_shaders[s];
336			switch(s)
337			{
338			case PIPE_SHADER_VERTEX:
339				pipe->bind_vs_state(pipe, shader_cso);
340				break;
341			case PIPE_SHADER_FRAGMENT:
342				pipe->bind_fs_state(pipe, shader_cso);
343				break;
344			case PIPE_SHADER_GEOMETRY:
345				pipe->bind_gs_state(pipe, shader_cso);
346				break;
347			}
348			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
349		}
350	}
351
352	template<unsigned s>
353	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
354	{
355		for(unsigned i = 0; i < count; ++i)
356		{
357			if(constbufs[i] != constant_buffers[s][start + i].p)
358			{
359				constant_buffers[s][start + i] = constbufs[i];
360				if(s < caps.stages && start + i < caps.constant_buffers[s])
361					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
362			}
363		}
364	}
365
366	template<unsigned s>
367	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
368	{
369		int last_different = -1;
370		for(unsigned i = 0; i < count; ++i)
371		{
372			if(shader_resource_views[s][start + i].p != srvs[i])
373			{
374				shader_resource_views[s][start + i] = srvs[i];
375				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
376				last_different = i;
377			}
378		}
379		if(last_different >= 0)
380		{
381			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
382			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
383		}
384	}
385
386	template<unsigned s>
387	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
388	{
389		int last_different = -1;
390		for(unsigned i = 0; i < count; ++i)
391		{
392			if(samplers[s][start + i].p != samps[i])
393			{
394				samplers[s][start + i] = samps[i];
395				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
396				last_different = i;
397			}
398			if(last_different >= 0)
399			{
400				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
401				update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
402			}
403		}
404	}
405
406#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
407	virtual void STDMETHODCALLTYPE XS##SetShader( \
408		ID3D11##Stage##Shader *pShader \
409		SET_SHADER_EXTRA_ARGS) \
410	{ \
411		SYNCHRONIZED; \
412		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
413	} \
414	virtual void STDMETHODCALLTYPE XS##GetShader(\
415		ID3D11##Stage##Shader **ppShader \
416		GET_SHADER_EXTRA_ARGS) \
417	{ \
418		SYNCHRONIZED; \
419		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
420	} \
421	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
422		unsigned start, \
423		unsigned count, \
424		ID3D11Buffer *const* constant_buffers) \
425	{ \
426		SYNCHRONIZED; \
427		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
428	} \
429	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
430		unsigned start, \
431		unsigned count, \
432		ID3D11Buffer **out_constant_buffers) \
433	{ \
434		SYNCHRONIZED; \
435		for(unsigned i = 0; i < count; ++i) \
436			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
437	} \
438	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
439		unsigned start, \
440		unsigned count, \
441		ID3D11ShaderResourceView *const *new_shader_resource_views) \
442	{ \
443		SYNCHRONIZED; \
444		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
445	} \
446	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
447		unsigned start, \
448		unsigned count, \
449		ID3D11ShaderResourceView **out_shader_resource_views) \
450	{ \
451		SYNCHRONIZED; \
452		for(unsigned i = 0; i < count; ++i) \
453			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
454	} \
455	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
456		unsigned start, \
457		unsigned count, \
458		ID3D11SamplerState *const *new_samplers) \
459	{ \
460		SYNCHRONIZED; \
461		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
462	} \
463	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
464		unsigned start, \
465		unsigned count, \
466		ID3D11SamplerState **out_samplers) \
467	{ \
468		SYNCHRONIZED; \
469		for(unsigned i = 0; i < count; ++i) \
470			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
471	}
472
473#define DO_VS(x) x
474#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
475#define DO_PS(x) x
476#define DO_HS(x)
477#define DO_DS(x)
478#define DO_CS(x)
479	IMPLEMENT_SHADER_STAGE(VS, Vertex)
480	IMPLEMENT_SHADER_STAGE(GS, Geometry)
481	IMPLEMENT_SHADER_STAGE(PS, Pixel)
482
483#if API >= 11
484	IMPLEMENT_SHADER_STAGE(HS, Hull)
485	IMPLEMENT_SHADER_STAGE(DS, Domain)
486	IMPLEMENT_SHADER_STAGE(CS, Compute)
487
488	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
489		unsigned start,
490		unsigned count,
491		ID3D11UnorderedAccessView *const *new_unordered_access_views,
492		const unsigned *new_uav_initial_counts)
493	{
494		SYNCHRONIZED;
495		for(unsigned i = 0; i < count; ++i)
496			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
497	}
498
499	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
500		unsigned start,
501		unsigned count,
502		ID3D11UnorderedAccessView **out_unordered_access_views)
503	{
504		SYNCHRONIZED;
505		for(unsigned i = 0; i < count; ++i)
506			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
507	}
508#endif
509
510	template<unsigned s>
511	void update_stage()
512	{
513		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
514		{
515			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
516				--num_shader_resource_views[s];
517			if((1 << s) & caps.stages_with_sampling)
518			{
519				struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
520				unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
521				for(unsigned i = 0; i < num_views_to_bind; ++i)
522				{
523					views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
524				}
525				switch(s)
526				{
527				case PIPE_SHADER_VERTEX:
528					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
529					break;
530				case PIPE_SHADER_FRAGMENT:
531					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
532					break;
533				case PIPE_SHADER_GEOMETRY:
534					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
535					break;
536				}
537			}
538		}
539
540		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
541		{
542			while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
543				--num_samplers[s];
544			if((1 << s) & caps.stages_with_sampling)
545			{
546				void* samplers_to_bind[PIPE_MAX_SAMPLERS];
547				unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
548				for(unsigned i = 0; i < num_samplers_to_bind; ++i)
549				{
550					// index can be -1 to access sampler_csos[s].ld
551					samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
552				}
553				switch(s)
554				{
555				case PIPE_SHADER_VERTEX:
556					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
557					break;
558				case PIPE_SHADER_FRAGMENT:
559					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
560					break;
561				case PIPE_SHADER_GEOMETRY:
562					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
563					break;
564				}
565			}
566		}
567	}
568
569	void update_state()
570	{
571		update_stage<D3D11_STAGE_PS>();
572		update_stage<D3D11_STAGE_VS>();
573		update_stage<D3D11_STAGE_GS>();
574#if API >= 11
575		update_stage<D3D11_STAGE_HS>();
576		update_stage<D3D11_STAGE_DS>();
577		update_stage<D3D11_STAGE_CS>();
578#endif
579
580		if(update_flags & UPDATE_VERTEX_BUFFERS)
581		{
582			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
583				--num_vertex_buffers;
584			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
585		}
586
587		update_flags = 0;
588	}
589
590	virtual void STDMETHODCALLTYPE IASetInputLayout(
591		ID3D11InputLayout *new_input_layout)
592	{
593		SYNCHRONIZED;
594		if(new_input_layout != input_layout.p)
595		{
596			input_layout = new_input_layout;
597			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
598		}
599	}
600
601	virtual void STDMETHODCALLTYPE IAGetInputLayout(
602		ID3D11InputLayout **out_input_layout)
603	{
604		SYNCHRONIZED;
605		*out_input_layout = input_layout.ref();
606	}
607
608	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
609		unsigned start,
610		unsigned count,
611		ID3D11Buffer *const *new_vertex_buffers,
612		const unsigned *new_strides,
613		const unsigned *new_offsets)
614	{
615		SYNCHRONIZED;
616		int last_different = -1;
617		for(unsigned i = 0; i < count; ++i)
618		{
619			ID3D11Buffer* buffer = new_vertex_buffers[i];
620			if(buffer != input_buffers[start + i].p
621				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
622				|| vertex_buffers[start + i].stride != new_offsets[i]
623			)
624			{
625				input_buffers[start + i] = buffer;
626				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
627				vertex_buffers[start + i].buffer_offset = new_offsets[i];
628				vertex_buffers[start + i].stride = new_strides[i];
629				last_different = i;
630			}
631		}
632		if(last_different >= 0)
633		{
634			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
635			update_flags |= UPDATE_VERTEX_BUFFERS;
636		}
637	}
638
639	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
640		unsigned start,
641		unsigned count,
642		ID3D11Buffer **out_vertex_buffers,
643		unsigned *out_strides,
644		unsigned *out_offsets)
645	{
646		SYNCHRONIZED;
647		if(out_vertex_buffers)
648		{
649			for(unsigned i = 0; i < count; ++i)
650				out_vertex_buffers[i] = input_buffers[start + i].ref();
651		}
652
653		if(out_offsets)
654		{
655			for(unsigned i = 0; i < count; ++i)
656				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
657		}
658
659		if(out_strides)
660		{
661			for(unsigned i = 0; i < count; ++i)
662				out_strides[i] = vertex_buffers[start + i].stride;
663		}
664	}
665
666	void set_index_buffer()
667	{
668		pipe_index_buffer ib;
669		if(!index_buffer)
670		{
671			memset(&ib, 0, sizeof(ib));
672		}
673		else
674		{
675			if(index_format == DXGI_FORMAT_R32_UINT)
676				ib.index_size = 4;
677			else if(index_format == DXGI_FORMAT_R16_UINT)
678				ib.index_size = 2;
679			else
680				ib.index_size = 1;
681			ib.offset = index_offset;
682			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
683		}
684		pipe->set_index_buffer(pipe, &ib);
685	}
686
687	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
688		ID3D11Buffer *new_index_buffer,
689		DXGI_FORMAT new_index_format,
690		unsigned new_index_offset)
691	{
692		SYNCHRONIZED;
693		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
694		{
695			index_buffer = new_index_buffer;
696			index_format = new_index_format;
697			index_offset = new_index_offset;
698
699			set_index_buffer();
700		}
701	}
702
703	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
704		ID3D11Buffer **out_index_buffer,
705		DXGI_FORMAT *out_index_format,
706		unsigned *out_index_offset)
707	{
708		SYNCHRONIZED;
709		if(out_index_buffer)
710			*out_index_buffer = index_buffer.ref();
711		if(out_index_format)
712			*out_index_format = index_format;
713		if(out_index_offset)
714			*out_index_offset = index_offset;
715	}
716
717	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
718		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
719	{
720		SYNCHRONIZED;
721		if(primitive_topology != new_primitive_topology)
722		{
723			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
724				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
725			else
726				primitive_mode = 0;
727			primitive_topology = new_primitive_topology;
728		}
729	}
730
731	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
732		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
733	{
734		SYNCHRONIZED;
735		*out_primitive_topology = primitive_topology;
736	}
737
738	virtual void STDMETHODCALLTYPE DrawIndexed(
739		unsigned index_count,
740		unsigned start_index_location,
741		int base_vertex_location)
742	{
743		SYNCHRONIZED;
744		if(update_flags)
745			update_state();
746
747		pipe_draw_info info;
748		info.mode = primitive_mode;
749		info.indexed = TRUE;
750		info.count = index_count;
751		info.start = start_index_location;
752		info.index_bias = base_vertex_location;
753		info.min_index = 0;
754		info.max_index = ~0;
755		info.start_instance = 0;
756		info.instance_count = 1;
757
758		pipe->draw_vbo(pipe, &info);
759	}
760
761	virtual void STDMETHODCALLTYPE Draw(
762		unsigned vertex_count,
763		unsigned start_vertex_location)
764	{
765		SYNCHRONIZED;
766		if(update_flags)
767			update_state();
768
769		pipe_draw_info info;
770		info.mode = primitive_mode;
771		info.indexed = FALSE;
772		info.count = vertex_count;
773		info.start = start_vertex_location;
774		info.index_bias = 0;
775		info.min_index = 0;
776		info.max_index = ~0;
777		info.start_instance = 0;
778		info.instance_count = 1;
779
780		pipe->draw_vbo(pipe, &info);
781	}
782
783	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
784		unsigned index_countPerInstance,
785		unsigned instance_count,
786		unsigned start_index_location,
787		int base_vertex_location,
788		unsigned start_instance_location)
789	{
790		SYNCHRONIZED;
791		if(update_flags)
792			update_state();
793
794		pipe_draw_info info;
795		info.mode = primitive_mode;
796		info.indexed = TRUE;
797		info.count = index_countPerInstance;
798		info.start = start_index_location;
799		info.index_bias = base_vertex_location;
800		info.min_index = 0;
801		info.max_index = ~0;
802		info.start_instance = start_instance_location;
803		info.instance_count = instance_count;
804
805		pipe->draw_vbo(pipe, &info);
806	}
807
808	virtual void STDMETHODCALLTYPE DrawInstanced(
809		unsigned vertex_countPerInstance,
810		unsigned instance_count,
811		unsigned start_vertex_location,
812		unsigned start_instance_location)
813	{
814		SYNCHRONIZED;
815		if(update_flags)
816			update_state();
817
818		pipe_draw_info info;
819		info.mode = primitive_mode;
820		info.indexed = FALSE;
821		info.count = vertex_countPerInstance;
822		info.start = start_vertex_location;
823		info.index_bias = 0;
824		info.min_index = 0;
825		info.max_index = ~0;
826		info.start_instance = start_instance_location;
827		info.instance_count = instance_count;
828
829		pipe->draw_vbo(pipe, &info);
830	}
831
832	virtual void STDMETHODCALLTYPE DrawAuto(void)
833	{
834		if(!caps.so)
835			return;
836
837		SYNCHRONIZED;
838		if(update_flags)
839			update_state();
840
841		pipe->draw_stream_output(pipe, primitive_mode);
842	}
843
844	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
845		ID3D11Buffer *buffer,
846		unsigned aligned_byte_offset)
847	{
848		SYNCHRONIZED;
849		if(update_flags)
850			update_state();
851
852		struct {
853			unsigned count;
854			unsigned instance_count;
855			unsigned start;
856			unsigned index_bias;
857		} data;
858
859		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
860
861		pipe_draw_info info;
862		info.mode = primitive_mode;
863		info.indexed = TRUE;
864		info.start = data.start;
865		info.count = data.count;
866		info.index_bias = data.index_bias;
867		info.min_index = 0;
868		info.max_index = ~0;
869		info.start_instance = 0;
870		info.instance_count = data.instance_count;
871
872		pipe->draw_vbo(pipe, &info);
873	}
874
875	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
876		ID3D11Buffer *buffer,
877		unsigned aligned_byte_offset)
878	{
879		SYNCHRONIZED;
880		if(update_flags)
881			update_state();
882
883		struct {
884			unsigned count;
885			unsigned instance_count;
886			unsigned start;
887		} data;
888
889		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
890
891		pipe_draw_info info;
892		info.mode = primitive_mode;
893		info.indexed = FALSE;
894		info.start = data.start;
895		info.count = data.count;
896		info.index_bias = 0;
897		info.min_index = 0;
898		info.max_index = ~0;
899		info.start_instance = 0;
900		info.instance_count = data.instance_count;
901
902		pipe->draw_vbo(pipe, &info);
903	}
904
905#if API >= 11
906	virtual void STDMETHODCALLTYPE Dispatch(
907		unsigned thread_group_count_x,
908		unsigned thread_group_count_y,
909		unsigned thread_group_count_z)
910	{
911// uncomment this when this is implemented
912//		SYNCHRONIZED;
913//		if(update_flags)
914//			update_state();
915	}
916
917	virtual void STDMETHODCALLTYPE DispatchIndirect(
918		ID3D11Buffer *buffer,
919		unsigned aligned_byte_offset)
920	{
921// uncomment this when this is implemented
922//		SYNCHRONIZED;
923//		if(update_flags)
924//			update_state();
925	}
926#endif
927
928	void set_clip()
929	{
930		pipe_clip_state clip;
931		clip.nr = 0;
932		clip.depth_clamp = depth_clamp;
933		pipe->set_clip_state(pipe, &clip);
934	}
935
936	virtual void STDMETHODCALLTYPE RSSetState(
937		ID3D11RasterizerState *new_rasterizer_state)
938	{
939		SYNCHRONIZED;
940		if(new_rasterizer_state != rasterizer_state.p)
941		{
942			rasterizer_state = new_rasterizer_state;
943			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
944			bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
945			if(depth_clamp != new_depth_clamp)
946			{
947				depth_clamp = new_depth_clamp;
948				set_clip();
949			}
950		}
951	}
952
953	virtual void STDMETHODCALLTYPE RSGetState(
954		ID3D11RasterizerState **out_rasterizer_state)
955	{
956		SYNCHRONIZED;
957		*out_rasterizer_state = rasterizer_state.ref();
958	}
959
960	void set_viewport()
961	{
962		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
963		pipe_viewport_state viewport;
964		float half_width = viewports[0].Width * 0.5f;
965		float half_height = viewports[0].Height * 0.5f;
966
967		viewport.scale[0] = half_width;
968		viewport.scale[1] = -half_height;
969		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
970		viewport.scale[3] = 1.0f;
971		viewport.translate[0] = half_width + viewports[0].TopLeftX;
972		viewport.translate[1] = half_height + viewports[0].TopLeftY;
973		viewport.translate[2] = viewports[0].MinDepth;
974		viewport.translate[3] = 1.0f;
975		pipe->set_viewport_state(pipe, &viewport);
976	}
977
978	virtual void STDMETHODCALLTYPE RSSetViewports(
979		unsigned count,
980		const D3D11_VIEWPORT *new_viewports)
981	{
982		SYNCHRONIZED;
983		if(count)
984		{
985			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
986			{
987				viewports[0] = new_viewports[0];
988				set_viewport();
989			}
990			for(unsigned i = 1; i < count; ++i)
991				viewports[i] = new_viewports[i];
992		}
993		else if(num_viewports)
994		{
995			// TODO: what should we do here?
996			memset(&viewports[0], 0, sizeof(viewports[0]));
997			set_viewport();
998		}
999		num_viewports = count;
1000	}
1001
1002	virtual void STDMETHODCALLTYPE RSGetViewports(
1003		unsigned *out_count,
1004		D3D11_VIEWPORT *out_viewports)
1005	{
1006		SYNCHRONIZED;
1007		if(out_viewports)
1008		{
1009			unsigned i;
1010			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1011				out_viewports[i] = viewports[i];
1012
1013			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1014		}
1015
1016		*out_count = num_viewports;
1017	}
1018
1019	void set_scissor()
1020	{
1021		pipe_scissor_state scissor;
1022		scissor.minx = scissor_rects[0].left;
1023		scissor.miny = scissor_rects[0].top;
1024		scissor.maxx = scissor_rects[0].right;
1025		scissor.maxy = scissor_rects[0].bottom;
1026		pipe->set_scissor_state(pipe, &scissor);
1027	}
1028
1029	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1030		unsigned count,
1031		const D3D11_RECT *new_rects)
1032	{
1033		SYNCHRONIZED;
1034		if(count)
1035		{
1036			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1037			{
1038				scissor_rects[0] = new_rects[0];
1039				set_scissor();
1040			}
1041			for(unsigned i = 1; i < count; ++i)
1042				scissor_rects[i] = new_rects[i];
1043		}
1044		else if(num_scissor_rects)
1045		{
1046			// TODO: what should we do here?
1047			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1048			set_scissor();
1049		}
1050
1051		num_scissor_rects = count;
1052	}
1053
1054	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1055		unsigned *out_count,
1056		D3D11_RECT *out_rects)
1057	{
1058		SYNCHRONIZED;
1059		if(out_rects)
1060		{
1061			unsigned i;
1062			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1063				out_rects[i] = scissor_rects[i];
1064
1065			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1066		}
1067
1068		*out_count = num_scissor_rects;
1069	}
1070
1071	virtual void STDMETHODCALLTYPE OMSetBlendState(
1072		ID3D11BlendState *new_blend_state,
1073		const float new_blend_factor[4],
1074		unsigned new_sample_mask)
1075	{
1076		SYNCHRONIZED;
1077		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1078
1079		if(blend_state.p != new_blend_state)
1080		{
1081			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1082			blend_state = new_blend_state;
1083		}
1084
1085		// Windows D3D11 does this, even though it's apparently undocumented
1086		if(!new_blend_factor)
1087			new_blend_factor = white;
1088
1089		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1090		{
1091			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1092			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1093		}
1094
1095		if(sample_mask != new_sample_mask)
1096		{
1097			pipe->set_sample_mask(pipe, new_sample_mask);
1098			sample_mask = new_sample_mask;
1099		}
1100	}
1101
1102	virtual void STDMETHODCALLTYPE OMGetBlendState(
1103		ID3D11BlendState **out_blend_state,
1104		float out_blend_factor[4],
1105		unsigned *out_sample_mask)
1106	{
1107		SYNCHRONIZED;
1108		if(out_blend_state)
1109			*out_blend_state = blend_state.ref();
1110		if(out_blend_factor)
1111			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1112		if(out_sample_mask)
1113			*out_sample_mask = sample_mask;
1114	}
1115
1116	void set_stencil_ref()
1117	{
1118		struct pipe_stencil_ref sref;
1119		sref.ref_value[0] = stencil_ref;
1120		sref.ref_value[1] = stencil_ref;
1121		pipe->set_stencil_ref(pipe, &sref);
1122	}
1123
1124	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1125		ID3D11DepthStencilState *new_depth_stencil_state,
1126		unsigned new_stencil_ref)
1127	{
1128		SYNCHRONIZED;
1129		if(new_depth_stencil_state != depth_stencil_state.p)
1130		{
1131			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1132			depth_stencil_state = new_depth_stencil_state;
1133		}
1134
1135		if(new_stencil_ref != stencil_ref)
1136		{
1137			stencil_ref = new_stencil_ref;
1138			set_stencil_ref();
1139		}
1140	}
1141
1142	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1143		ID3D11DepthStencilState **out_depth_stencil_state,
1144		unsigned *out_stencil_ref)
1145	{
1146		SYNCHRONIZED;
1147		if(*out_depth_stencil_state)
1148			*out_depth_stencil_state = depth_stencil_state.ref();
1149		if(out_stencil_ref)
1150			*out_stencil_ref = stencil_ref;
1151	}
1152
1153	void set_framebuffer()
1154	{
1155		struct pipe_framebuffer_state fb;
1156		memset(&fb, 0, sizeof(fb));
1157		if(depth_stencil_view)
1158		{
1159			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1160			fb.zsbuf = surf;
1161			if(surf->width > fb.width)
1162				fb.width = surf->width;
1163			if(surf->height > fb.height)
1164				fb.height = surf->height;
1165		}
1166		fb.nr_cbufs = num_render_target_views;
1167		unsigned i;
1168		for(i = 0; i < num_render_target_views; ++i)
1169		{
1170			if(render_target_views[i])
1171			{
1172				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1173				fb.cbufs[i] = surf;
1174				if(surf->width > fb.width)
1175					fb.width = surf->width;
1176				if(surf->height > fb.height)
1177					fb.height = surf->height;
1178			}
1179		}
1180
1181		pipe->set_framebuffer_state(pipe, &fb);
1182	}
1183
1184	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1185	 * Hopefully nobody relies on this happening
1186	 */
1187
1188	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1189		unsigned count,
1190		ID3D11RenderTargetView *const *new_render_target_views,
1191		ID3D11DepthStencilView  *new_depth_stencil_view)
1192	{
1193		SYNCHRONIZED;
1194		if(!new_render_target_views)
1195			count = 0;
1196		if(count == num_render_target_views)
1197		{
1198			for(unsigned i = 0; i < count; ++i)
1199			{
1200				if(new_render_target_views[i] != render_target_views[i].p)
1201					goto changed;
1202			}
1203			return;
1204		}
1205changed:
1206		depth_stencil_view = new_depth_stencil_view;
1207		unsigned i;
1208		for(i = 0; i < count; ++i)
1209		{
1210			render_target_views[i] = new_render_target_views[i];
1211#if API >= 11
1212			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1213#endif
1214		}
1215		for(; i < num_render_target_views; ++i)
1216			render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1217		num_render_target_views = count;
1218		set_framebuffer();
1219	}
1220
1221	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1222		unsigned count,
1223		ID3D11RenderTargetView **out_render_target_views,
1224		ID3D11DepthStencilView  **out_depth_stencil_view)
1225	{
1226		SYNCHRONIZED;
1227		if(out_render_target_views)
1228		{
1229			unsigned i;
1230			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1231				out_render_target_views[i] = render_target_views[i].ref();
1232
1233			for(; i < count; ++i)
1234				out_render_target_views[i] = 0;
1235		}
1236
1237		if(out_depth_stencil_view)
1238			*out_depth_stencil_view = depth_stencil_view.ref();
1239	}
1240
1241#if API >= 11
1242	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1243	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1244		unsigned rtv_count,
1245		ID3D11RenderTargetView *const *new_render_target_views,
1246		ID3D11DepthStencilView  *new_depth_stencil_view,
1247		unsigned uav_start,
1248		unsigned uav_count,
1249		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1250		const unsigned *new_uav_initial_counts)
1251	{
1252		SYNCHRONIZED;
1253		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1254			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1255
1256		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1257		{
1258			for(unsigned i = 0; i < uav_count; ++i)
1259			{
1260				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1261				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1262			}
1263		}
1264	}
1265
1266	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1267		unsigned rtv_count,
1268		ID3D11RenderTargetView **out_render_target_views,
1269		ID3D11DepthStencilView  **out_depth_stencil_view,
1270		unsigned uav_start,
1271		unsigned uav_count,
1272		ID3D11UnorderedAccessView **out_unordered_access_views)
1273	{
1274		SYNCHRONIZED;
1275		if(out_render_target_views)
1276			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1277
1278		if(out_unordered_access_views)
1279		{
1280			for(unsigned i = 0; i < uav_count; ++i)
1281				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1282		}
1283	}
1284#endif
1285
1286	virtual void STDMETHODCALLTYPE SOSetTargets(
1287		unsigned count,
1288		ID3D11Buffer *const *new_so_targets,
1289		const unsigned *new_offsets)
1290	{
1291		SYNCHRONIZED;
1292		unsigned i;
1293		if(!new_so_targets)
1294			count = 0;
1295		bool changed = false;
1296		for(i = 0; i < count; ++i)
1297		{
1298			ID3D11Buffer* buffer = new_so_targets[i];
1299			if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1300			{
1301				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1302				so_targets[i] = buffer;
1303				so_offsets[i] = new_offsets[i];
1304				changed = true;
1305			}
1306		}
1307		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1308		{
1309			if(so_targets[i].p || so_offsets[i])
1310			{
1311				changed = true;
1312				so_targets[i] = (ID3D11Buffer*)0;
1313				so_offsets[i] = 0;
1314			}
1315		}
1316		num_so_targets = count;
1317
1318		if(changed && caps.so)
1319			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1320	}
1321
1322	virtual void STDMETHODCALLTYPE SOGetTargets(
1323		unsigned count,
1324		ID3D11Buffer **out_so_targets
1325#if API < 11
1326		, UINT *out_offsets
1327#endif
1328		)
1329	{
1330		SYNCHRONIZED;
1331		for(unsigned i = 0; i < count; ++i)
1332		{
1333			out_so_targets[i] = so_targets[i].ref();
1334#if API < 11
1335			out_offsets[i] = so_offsets[i];
1336#endif
1337		}
1338	}
1339
1340	virtual void STDMETHODCALLTYPE Begin(
1341		ID3D11Asynchronous *async)
1342	{
1343		SYNCHRONIZED;
1344		if(caps.queries)
1345			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1346	}
1347
1348	virtual void STDMETHODCALLTYPE End(
1349		ID3D11Asynchronous *async)
1350	{
1351		SYNCHRONIZED;
1352		if(caps.queries)
1353			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1354	}
1355
1356	virtual HRESULT STDMETHODCALLTYPE GetData(
1357		ID3D11Asynchronous *iasync,
1358		void *out_data,
1359		unsigned data_size,
1360		unsigned get_data_flags)
1361	{
1362		SYNCHRONIZED;
1363		if(!caps.queries)
1364			return E_NOTIMPL;
1365
1366		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1367		void* tmp_data = alloca(async->data_size);
1368		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1369		if(out_data)
1370			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1371		return ret ? S_OK : S_FALSE;
1372	}
1373
1374	void set_render_condition()
1375	{
1376		if(caps.render_condition)
1377		{
1378			if(!render_predicate)
1379				pipe->render_condition(pipe, 0, 0);
1380			else
1381			{
1382				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1383				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1384				{
1385					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1386					pipe->render_condition(pipe, predicate->query, mode);
1387				}
1388				else
1389				{
1390					/* TODO: add inverted predication to Gallium*/
1391					pipe->render_condition(pipe, 0, 0);
1392				}
1393			}
1394		}
1395	}
1396
1397	virtual void STDMETHODCALLTYPE SetPredication(
1398		ID3D11Predicate *new_predicate,
1399		BOOL new_predicate_value)
1400	{
1401		SYNCHRONIZED;
1402		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1403		{
1404			render_predicate = new_predicate;
1405			render_predicate_value = new_predicate_value;
1406			set_render_condition();
1407		}
1408	}
1409
1410	virtual void STDMETHODCALLTYPE GetPredication(
1411		ID3D11Predicate **out_predicate,
1412		BOOL *out_predicate_value)
1413	{
1414		SYNCHRONIZED;
1415		if(out_predicate)
1416			*out_predicate = render_predicate.ref();
1417		if(out_predicate_value)
1418			*out_predicate_value = render_predicate_value;
1419	}
1420
1421	static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1422	{
1423		if(subresource <= resource->last_level)
1424		{
1425			return subresource;
1426		}
1427		else
1428		{
1429			unsigned levels = resource->last_level + 1;
1430			return subresource % levels;
1431		}
1432	}
1433
1434	static unsigned d3d11_subresource_to_face(struct pipe_resource* resource, unsigned subresource)
1435	{
1436		if(subresource <= resource->last_level)
1437		{
1438			return 0;
1439		}
1440		else
1441		{
1442			unsigned levels = resource->last_level + 1;
1443			return subresource / levels;
1444		}
1445	}
1446
1447
1448	/* TODO: deferred contexts will need a different implementation of this,
1449	 * because we can't put the transfer info into the resource itself.
1450	 * Also, there are very different restrictions, for obvious reasons.
1451	 */
1452	virtual HRESULT STDMETHODCALLTYPE Map(
1453		ID3D11Resource *iresource,
1454		unsigned subresource,
1455		D3D11_MAP map_type,
1456		unsigned map_flags,
1457		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1458	{
1459		SYNCHRONIZED;
1460		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1461		if(resource->transfers.count(subresource))
1462			return E_FAIL;
1463		unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1464		unsigned face = d3d11_subresource_to_face(resource->resource, subresource);
1465		pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1466		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1467		unsigned usage = 0;
1468		if(map_type == D3D11_MAP_READ)
1469			usage = PIPE_TRANSFER_READ;
1470		else if(map_type == D3D11_MAP_WRITE)
1471			usage = PIPE_TRANSFER_WRITE;
1472		else if(map_type == D3D11_MAP_READ_WRITE)
1473			usage = PIPE_TRANSFER_READ_WRITE;
1474		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1475			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1476		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1477			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1478		else
1479			return E_INVALIDARG;
1480		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1481			usage |= PIPE_TRANSFER_DONTBLOCK;
1482		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1483		if(!transfer) {
1484			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1485				return DXGI_ERROR_WAS_STILL_DRAWING;
1486			else
1487				return E_FAIL;
1488		}
1489		resource->transfers[subresource] = transfer;
1490		mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1491		mapped_resource->RowPitch = transfer->stride;
1492		mapped_resource->DepthPitch = transfer->layer_stride;
1493		return S_OK;
1494	}
1495
1496	virtual void STDMETHODCALLTYPE Unmap(
1497		ID3D11Resource *iresource,
1498		unsigned subresource)
1499	{
1500		SYNCHRONIZED;
1501		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1502		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1503		if(i != resource->transfers.end())
1504		{
1505			pipe->transfer_unmap(pipe, i->second);
1506			pipe->transfer_destroy(pipe, i->second);
1507			resource->transfers.erase(i);
1508		}
1509	}
1510
1511	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1512		ID3D11Resource *dst_resource,
1513		unsigned dst_subresource,
1514		unsigned dst_x,
1515		unsigned dst_y,
1516		unsigned dst_z,
1517		ID3D11Resource *src_resource,
1518		unsigned src_subresource,
1519		const D3D11_BOX *src_box)
1520	{
1521		SYNCHRONIZED;
1522		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1523		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1524		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1525		unsigned dst_face = d3d11_subresource_to_face(dst->resource, dst_subresource);
1526		unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1527		unsigned src_face = d3d11_subresource_to_face(src->resource, src_subresource);
1528		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1529		pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1530		{
1531			pipe->resource_copy_region(pipe,
1532				dst->resource, dst_level, dst_x, dst_y, dst_z,
1533				src->resource, src_level, &box);
1534		}
1535	}
1536
1537	virtual void STDMETHODCALLTYPE CopyResource(
1538		ID3D11Resource *dst_resource,
1539		ID3D11Resource *src_resource)
1540	{
1541		SYNCHRONIZED;
1542		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1543		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1544		unsigned level;
1545		for(level = 0; level <= dst->resource->last_level; ++level)
1546		{
1547		        unsigned layers = 1;
1548			pipe_box box;
1549			if (dst->resource->target == PIPE_TEXTURE_CUBE)
1550				layers = 6;
1551			else if (dst->resource->target == PIPE_TEXTURE_3D)
1552				layers = u_minify(dst->resource->depth0, level);
1553			/* else layers = dst->resource->array_size; */
1554			box.x = box.y = box.z = 0;
1555			box.width = u_minify(dst->resource->width0, level);
1556			box.height = u_minify(dst->resource->height0, level);
1557			box.depth = layers;
1558			pipe->resource_copy_region(pipe,
1559						   dst->resource, level, 0, 0, 0,
1560						   src->resource, level, &box);
1561		}
1562	}
1563
1564	virtual void STDMETHODCALLTYPE UpdateSubresource(
1565		ID3D11Resource *dst_resource,
1566		unsigned dst_subresource,
1567		const D3D11_BOX *pDstBox,
1568		const void *pSrcData,
1569		unsigned src_row_pitch,
1570		unsigned src_depth_pitch)
1571	{
1572		SYNCHRONIZED;
1573		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1574		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1575		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1576		pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1577		pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1578	}
1579
1580#if API >= 11
1581	virtual void STDMETHODCALLTYPE CopyStructureCount(
1582		ID3D11Buffer *dst_buffer,
1583		unsigned dst_aligned_byte_offset,
1584		ID3D11UnorderedAccessView *src_view)
1585	{
1586		SYNCHRONIZED;
1587	}
1588#endif
1589
1590	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1591		ID3D11RenderTargetView *render_target_view,
1592		const float color[4])
1593	{
1594		SYNCHRONIZED;
1595		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1596		pipe->clear_render_target(pipe, view->object, color, 0, 0, view->object->width, view->object->height);
1597	}
1598
1599	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1600		ID3D11DepthStencilView  *depth_stencil_view,
1601		unsigned clear_flags,
1602		float depth,
1603		UINT8 stencil)
1604	{
1605		SYNCHRONIZED;
1606		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1607		unsigned flags = 0;
1608		if(clear_flags & D3D11_CLEAR_DEPTH)
1609			flags |= PIPE_CLEAR_DEPTH;
1610		if(clear_flags & D3D11_CLEAR_STENCIL)
1611			flags |= PIPE_CLEAR_STENCIL;
1612		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1613	}
1614
1615#if API >= 11
1616	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1617		ID3D11UnorderedAccessView *unordered_access_view,
1618		const unsigned values[4])
1619	{
1620		SYNCHRONIZED;
1621	}
1622
1623	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1624			ID3D11UnorderedAccessView *unordered_access_view,
1625			const float values[4])
1626	{
1627		SYNCHRONIZED;
1628	}
1629#endif
1630
1631	void restore_gallium_state_blit_only()
1632	{
1633		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1634		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1635		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1636		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1637		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1638		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1639		if(caps.gs)
1640			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1641		set_framebuffer();
1642		set_viewport();
1643		set_clip();
1644		set_render_condition();
1645		// TODO: restore stream output
1646
1647		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1648	}
1649
1650	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1651	{
1652		SYNCHRONIZED;
1653		restore_gallium_state_blit_only();
1654	}
1655
1656	virtual void STDMETHODCALLTYPE GenerateMips(
1657		ID3D11ShaderResourceView *shader_resource_view)
1658	{
1659		SYNCHRONIZED;
1660
1661		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1662		if(caps.gs)
1663			pipe->bind_gs_state(pipe, 0);
1664		if(caps.so)
1665			pipe->bind_stream_output_state(pipe, 0);
1666		if(pipe->render_condition)
1667			pipe->render_condition(pipe, 0, 0);
1668		util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1669		restore_gallium_state_blit_only();
1670	}
1671
1672	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1673	{
1674		SYNCHRONIZED;
1675		restore_gallium_state_blit_only();
1676
1677		set_index_buffer();
1678		set_stencil_ref();
1679		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1680		pipe->set_sample_mask(pipe, sample_mask);
1681
1682		for(unsigned s = 0; s < 3; ++s)
1683		{
1684			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1685			for(unsigned i = 0; i < num; ++i)
1686				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1687		}
1688
1689		if(caps.so)
1690			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1691
1692		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1693		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1694
1695		set_scissor();
1696	}
1697
1698#if API >= 11
1699	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1700	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1701		ID3D11Resource *iresource,
1702		float min_lod)
1703	{
1704		SYNCHRONIZED;
1705		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1706		if(resource->min_lod != min_lod)
1707		{
1708			// TODO: actually do anything?
1709			resource->min_lod = min_lod;
1710		}
1711	}
1712
1713	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1714		ID3D11Resource *iresource)
1715	{
1716		SYNCHRONIZED;
1717		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1718		return resource->min_lod;
1719	}
1720#endif
1721
1722	virtual void STDMETHODCALLTYPE ResolveSubresource(
1723		ID3D11Resource *dst_resource,
1724		unsigned dst_subresource,
1725		ID3D11Resource *src_resource,
1726		unsigned src_subresource,
1727		DXGI_FORMAT format)
1728	{
1729		SYNCHRONIZED;
1730		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1731		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1732		struct pipe_resolve_info info;
1733
1734		info.dst.res = dst->resource;
1735		info.src.res = src->resource;
1736		info.dst.level = 0;
1737		info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
1738		info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource);
1739
1740		info.src.x0 = 0;
1741		info.src.x1 = info.src.res->width0;
1742		info.src.y0 = 0;
1743		info.src.y1 = info.src.res->height0;
1744		info.dst.x0 = 0;
1745		info.dst.x1 = info.dst.res->width0;
1746		info.dst.y0 = 0;
1747		info.dst.y1 = info.dst.res->height0;
1748
1749		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
1750
1751		pipe->resource_resolve(pipe, &info);
1752	}
1753
1754#if API >= 11
1755	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1756		ID3D11CommandList *command_list,
1757		BOOL restore_context_state)
1758	{
1759		SYNCHRONIZED;
1760	}
1761
1762	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1763		BOOL restore_deferred_context_state,
1764		ID3D11CommandList **out_command_list)
1765	{
1766		SYNCHRONIZED;
1767		return E_NOTIMPL;
1768	}
1769#endif
1770
1771	virtual void STDMETHODCALLTYPE ClearState(void)
1772	{
1773		/* we don't take a lock here because we would deadlock otherwise
1774		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1775		 * However, I can't think of any correct usage that would be affected by this
1776		 * being non-atomic, and making this atomic is quite expensive and complicates
1777		 * the code
1778		 */
1779
1780		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1781		// TODO: make sure all this gets inlined, which might require more compiler flags
1782		// TODO: optimize this
1783#if API >= 11
1784		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1785		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1786		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1787		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1788		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1789		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1790#else
1791		GalliumD3D11DeviceContext::PSSetShader(0);
1792		GalliumD3D11DeviceContext::GSSetShader(0);
1793		GalliumD3D11DeviceContext::VSSetShader(0);
1794#endif
1795
1796		GalliumD3D11DeviceContext::IASetInputLayout(0);
1797		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1798		GalliumD3D11DeviceContext::RSSetState(0);
1799		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1800		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1801		GalliumD3D11DeviceContext::SetPredication(0, 0);
1802		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1803
1804		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1805		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1806		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1807#if API >= 11
1808		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1809		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1810		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1811#endif
1812
1813		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1814#if API >= 11
1815		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1816#else
1817		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1818#endif
1819		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1820
1821		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1822		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1823		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1824#if API >= 11
1825		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1826		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1827		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1828#endif
1829
1830		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1831		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1832		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1833#if API >= 11
1834		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1835		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1836		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1837#endif
1838
1839		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1840		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1841	}
1842
1843	virtual void STDMETHODCALLTYPE Flush(void)
1844	{
1845		SYNCHRONIZED;
1846                pipe->flush(pipe, 0);
1847	}
1848
1849	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1850	 * cleanly unbound from the pipeline.
1851	 * In Direct3D 11, the pipeline holds a reference.
1852	 *
1853	 * Note that instead of always scanning the pipeline on destruction, we could
1854	 * maintain the internal reference count on DirectX 10 and use it to check if an
1855	 * object is still bound.
1856	 * Presumably, on average, scanning is faster if the application is well written.
1857	 */
1858#if API < 11
1859#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1860	void Unbind##name(ID3D11##name* state) \
1861	{ \
1862		SYNCHRONIZED; \
1863		if((void*)state == (void*)member.p) \
1864		{ \
1865			member.p = 0; \
1866			pipe->bind_##gallium##_state(pipe, default_##def); \
1867		} \
1868	}
1869	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1870	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1871	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1872	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1873	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1874	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1875	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1876
1877	void UnbindPredicate(ID3D11Predicate* predicate)
1878	{
1879		SYNCHRONIZED;
1880		if(predicate == render_predicate)
1881		{
1882			render_predicate.p = NULL;
1883			render_predicate_value = 0;
1884			pipe->render_condition(pipe, 0, 0);
1885		}
1886	}
1887
1888	void UnbindSamplerState(ID3D11SamplerState* state)
1889	{
1890		SYNCHRONIZED;
1891		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1892		{
1893			for(unsigned i = 0; i < num_samplers[s]; ++i)
1894			{
1895				if(samplers[s][i] == state)
1896				{
1897					samplers[s][i].p = NULL;
1898					sampler_csos[s].v[i] = NULL;
1899					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1900				}
1901			}
1902		}
1903	}
1904
1905	void UnbindBuffer(ID3D11Buffer* buffer)
1906	{
1907		SYNCHRONIZED;
1908		if(buffer == index_buffer)
1909		{
1910			index_buffer.p = 0;
1911			index_format = DXGI_FORMAT_UNKNOWN;
1912			index_offset = 0;
1913			struct pipe_index_buffer ib;
1914			memset(&ib, 0, sizeof(ib));
1915			pipe->set_index_buffer(pipe, &ib);
1916		}
1917
1918		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1919		{
1920			if(buffer == input_buffers[i])
1921			{
1922				input_buffers[i].p = 0;
1923				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1924				update_flags |= UPDATE_VERTEX_BUFFERS;
1925			}
1926		}
1927
1928		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1929		{
1930			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1931			{
1932				if(constant_buffers[s][i] == buffer)
1933				{
1934					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1935					pipe->set_constant_buffer(pipe, s, i, NULL);
1936				}
1937			}
1938		}
1939	}
1940
1941	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1942	{
1943		SYNCHRONIZED;
1944		if(view == depth_stencil_view)
1945		{
1946			depth_stencil_view.p = NULL;
1947			set_framebuffer();
1948		}
1949	}
1950
1951	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1952	{
1953		SYNCHRONIZED;
1954		bool any_bound = false;
1955		for(unsigned i = 0; i < num_render_target_views; ++i)
1956		{
1957			if(render_target_views[i] == view)
1958			{
1959				render_target_views[i].p = NULL;
1960				any_bound = true;
1961			}
1962		}
1963		if(any_bound)
1964			set_framebuffer();
1965	}
1966
1967	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1968	{
1969		SYNCHRONIZED;
1970		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1971		{
1972			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1973			{
1974				if(shader_resource_views[s][i] == view)
1975				{
1976					shader_resource_views[s][i].p = NULL;
1977					sampler_views[s][i] = NULL;
1978					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1979				}
1980			}
1981		}
1982	}
1983#endif
1984
1985#undef SYNCHRONIZED
1986};
1987
1988#if API >= 11
1989/* This approach serves two purposes.
1990 * First, we don't want to do an atomic operation to manipulate the reference
1991 * count every time something is bound/unbound to the pipeline, since they are
1992 * expensive.
1993 * Fortunately, the immediate context can only be used by a single thread, so
1994 * we don't have to use them, as long as a separate reference count is used
1995 * (see dual_refcnt_t).
1996 *
1997 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1998 * garbage cycle.
1999 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
2000 * one for each external reference count, while internal nonatomic_add_ref doesn't
2001 * add any.
2002 *
2003 * Note that ideally we would to eliminate the non-atomic op too, but this is more
2004 * complicated, since we would either need to use garbage collection and give up
2005 * deterministic destruction (especially bad for large textures), or scan the whole
2006 * pipeline state every time the reference count of object drops to 0, which risks
2007 * pathological slowdowns.
2008 *
2009 * Since this microoptimization should matter relatively little, let's avoid it for now.
2010 *
2011 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
2012 * Eliminating the atomic ops for deferred contexts seems substantially harder.
2013 * This might be a problem if they are used in a one-shot multithreaded rendering
2014 * fashion, where SMP cacheline bouncing on the reference count may be visible.
2015 *
2016 * The idea would be to attach a structure of reference counts indexed by deferred
2017 * context id to each object. Ideally, this should be organized like ext2 block pointers.
2018 *
2019 * Every deferred context would get a reference count in its own cacheline.
2020 * The external count is protected by a lock bit, and there is also a "lock bit" in each
2021 * internal count.
2022 *
2023 * When the external count has to be dropped to 0, the lock bit is taken and all internal
2024 * reference counts are scanned, taking a count of them. A flag would also be set on them.
2025 * Deferred context manipulation would notice the flag, and update the count.
2026 * Once the count goes to zero, the object is freed.
2027 *
2028 * The problem of this is that if the external reference count ping-pongs between
2029 * zero and non-zero, the scans will take a lot of time.
2030 *
2031 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2032 * each binary tree node would have a "determined bit", which would be invalidated
2033 * by manipulations.
2034 *
2035 * However, all this complexity might actually be a loss in most cases, so let's just
2036 * stick to a single atomic refcnt for now.
2037 *
2038 * Also, we don't even support deferred contexts yet, so this can wait.
2039 */
2040struct nonatomic_device_child_ptr_traits
2041{
2042	static void add_ref(void* p)
2043	{
2044		if(p)
2045			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2046	}
2047
2048	static void release(void* p)
2049	{
2050		if(p)
2051			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2052	}
2053};
2054
2055struct GalliumD3D11ImmediateDeviceContext
2056	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2057{
2058	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2059	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2060	{
2061		// not necessary, but tests that the API at least basically works
2062		ClearState();
2063	}
2064
2065	/* we do this since otherwise we would have a garbage cycle between this and the device */
2066	virtual ULONG STDMETHODCALLTYPE AddRef()
2067	{
2068		return this->device->AddRef();
2069	}
2070
2071	virtual ULONG STDMETHODCALLTYPE Release()
2072	{
2073		return this->device->Release();
2074	}
2075
2076	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2077	{
2078		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2079	}
2080};
2081
2082static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2083{
2084	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2085}
2086
2087static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2088{
2089	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2090}
2091
2092static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2093{
2094	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2095}
2096
2097static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2098{
2099	delete (GalliumD3D11ImmediateDeviceContext*)context;
2100}
2101#endif
2102