d3d11_context.h revision 3d13b081c72626acba6b9a25baec740c60dfafe1
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	uint32_t strip_cut_index;
75	BOOL render_predicate_value;
76	float blend_color[4];
77	unsigned sample_mask;
78	unsigned stencil_ref;
79	bool depth_clamp;
80
81	void* default_input_layout;
82	void* default_rasterizer;
83	void* default_depth_stencil;
84	void* default_blend;
85	void* default_sampler;
86	void* ld_sampler;
87	void * default_shaders[D3D11_STAGES];
88
89	// derived state
90	int primitive_mode;
91	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
92	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
93	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
94	struct
95	{
96		void* ld; // accessed with a -1 index from v
97		void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
98	} sampler_csos[D3D11_STAGES];
99	struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
100	unsigned num_shader_resource_views[D3D11_STAGES];
101	unsigned num_samplers[D3D11_STAGES];
102	unsigned num_vertex_buffers;
103	unsigned num_render_target_views;
104	unsigned num_viewports;
105	unsigned num_scissor_rects;
106	unsigned num_so_targets;
107
108	struct pipe_context* pipe;
109	unsigned update_flags;
110
111	bool owns_pipe;
112	unsigned context_flags;
113
114	GalliumD3D11Caps caps;
115
116	cso_context* cso_ctx;
117	gen_mipmap_state* gen_mipmap;
118
119#if API >= 11
120#define SYNCHRONIZED do {} while(0)
121
122	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
123	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
124	{
125		caps = device->screen_caps;
126		init_context();
127	}
128
129	~GalliumD3D11DeviceContext()
130	{
131		destroy_context();
132	}
133#else
134#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
135
136	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
137	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
138	{
139		caps = this->screen_caps;
140		init_context();
141	}
142
143	~GalliumD3D10Device()
144	{
145		destroy_context();
146	}
147#endif
148
149	void init_context()
150	{
151		if(!pipe->begin_query)
152			caps.queries = false;
153		if(!pipe->render_condition || !screen->get_param(screen, PIPE_CAP_CONDITIONAL_RENDER))
154			caps.render_condition = false;
155		if(!pipe->bind_gs_state)
156		{
157			caps.gs = false;
158			caps.stages = 2;
159		}
160		if(!pipe->set_stream_output_buffers)
161			caps.so = false;
162		if(!pipe->set_geometry_sampler_views)
163			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
164		if(!pipe->set_fragment_sampler_views)
165			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
166		if(!pipe->set_vertex_sampler_views)
167			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
168
169		update_flags = 0;
170
171		// pipeline state
172		memset(viewports, 0, sizeof(viewports));
173		memset(scissor_rects, 0, sizeof(scissor_rects));
174		memset(so_offsets, 0, sizeof(so_offsets));
175		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
176		index_format = DXGI_FORMAT_UNKNOWN;
177		index_offset = 0;
178		strip_cut_index = 0xffffffff;
179		render_predicate_value = 0;
180		memset(blend_color, 0, sizeof(blend_color));
181		sample_mask = ~0;
182		stencil_ref = 0;
183		depth_clamp = 0;
184
185		// derived state
186		primitive_mode = 0;
187		memset(vertex_buffers, 0, sizeof(vertex_buffers));
188		memset(so_buffers, 0, sizeof(so_buffers));
189		memset(sampler_views, 0, sizeof(sampler_views));
190		memset(sampler_csos, 0, sizeof(sampler_csos));
191		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
192		memset(num_samplers, 0, sizeof(num_samplers));
193		num_vertex_buffers = 0;
194		num_render_target_views = 0;
195		num_viewports = 0;
196		num_scissor_rects = 0;
197		num_so_targets = 0;
198
199		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
200
201		struct pipe_rasterizer_state rasterizerd;
202		memset(&rasterizerd, 0, sizeof(rasterizerd));
203		rasterizerd.gl_rasterization_rules = 1;
204		rasterizerd.cull_face = PIPE_FACE_BACK;
205		rasterizerd.flatshade_first = 1;
206		rasterizerd.line_width = 1.0f;
207		rasterizerd.point_size = 1.0f;
208		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
209
210		struct pipe_depth_stencil_alpha_state depth_stencild;
211		memset(&depth_stencild, 0, sizeof(depth_stencild));
212		depth_stencild.depth.enabled = TRUE;
213		depth_stencild.depth.writemask = 1;
214		depth_stencild.depth.func = PIPE_FUNC_LESS;
215		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
216
217		struct pipe_blend_state blendd;
218		memset(&blendd, 0, sizeof(blendd));
219		blendd.rt[0].colormask = 0xf;
220		default_blend = pipe->create_blend_state(pipe, &blendd);
221
222		struct pipe_sampler_state samplerd;
223		memset(&samplerd, 0, sizeof(samplerd));
224		samplerd.normalized_coords = 1;
225		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
226		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
227		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
228		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
229		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
230		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
231		samplerd.border_color.f[0] = 1.0f;
232		samplerd.border_color.f[1] = 1.0f;
233		samplerd.border_color.f[2] = 1.0f;
234		samplerd.border_color.f[3] = 1.0f;
235		samplerd.min_lod = -FLT_MAX;
236		samplerd.max_lod = FLT_MAX;
237		samplerd.max_anisotropy = 1;
238		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
239
240		memset(&samplerd, 0, sizeof(samplerd));
241		samplerd.normalized_coords = 0;
242		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
243		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
244		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
245		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
246		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
247		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
248		samplerd.min_lod = -FLT_MAX;
249		samplerd.max_lod = FLT_MAX;
250		samplerd.max_anisotropy = 1;
251		ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
252
253		for(unsigned s = 0; s < D3D11_STAGES; ++s)
254		{
255			sampler_csos[s].ld = ld_sampler;
256			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
257				sampler_csos[s].v[i] = default_sampler;
258		}
259
260		// TODO: should this really be empty shaders, or should they be all-passthrough?
261		memset(default_shaders, 0, sizeof(default_shaders));
262		struct ureg_program *ureg;
263		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
264		ureg_END(ureg);
265		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
266
267		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
268		ureg_END(ureg);
269		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
270
271		cso_ctx = cso_create_context(pipe);
272		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
273
274		RestoreGalliumState();
275	}
276
277	void destroy_context()
278	{
279		util_destroy_gen_mipmap(gen_mipmap);
280		cso_destroy_context(cso_ctx);
281
282		pipe->bind_vertex_elements_state(pipe, 0);
283		pipe->delete_vertex_elements_state(pipe, default_input_layout);
284
285		pipe->bind_rasterizer_state(pipe, 0);
286		pipe->delete_rasterizer_state(pipe, default_rasterizer);
287
288		pipe->bind_depth_stencil_alpha_state(pipe, 0);
289		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
290
291		pipe->bind_blend_state(pipe, 0);
292		pipe->delete_blend_state(pipe, default_blend);
293
294		pipe->bind_fragment_sampler_states(pipe, 0, 0);
295		pipe->bind_vertex_sampler_states(pipe, 0, 0);
296		if(pipe->bind_geometry_sampler_states)
297			pipe->bind_geometry_sampler_states(pipe, 0, 0);
298		pipe->delete_sampler_state(pipe, default_sampler);
299		pipe->delete_sampler_state(pipe, ld_sampler);
300
301		pipe->bind_fs_state(pipe, 0);
302		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
303
304		pipe->bind_vs_state(pipe, 0);
305		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
306
307		if(owns_pipe)
308			pipe->destroy(pipe);
309	}
310
311	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
312	{
313		return context_flags;
314	}
315#if API >= 11
316#define SET_SHADER_EXTRA_ARGS , \
317	ID3D11ClassInstance *const *ppClassInstances, \
318	unsigned count
319#define GET_SHADER_EXTRA_ARGS , \
320		ID3D11ClassInstance **ppClassInstances, \
321		unsigned *out_count
322#else
323#define SET_SHADER_EXTRA_ARGS
324#define GET_SHADER_EXTRA_ARGS
325#endif
326
327/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
328 * Instead, you have to pass a pointer to nulls to unbind things.
329 * We do the same.
330 * TODO: is D3D10 the same?
331 */
332	template<unsigned s>
333	void xs_set_shader(GalliumD3D11Shader<>* shader)
334	{
335		if(shader != shaders[s].p)
336		{
337			shaders[s] = shader;
338			void* shader_cso = shader ? shader->object : default_shaders[s];
339			switch(s)
340			{
341			case PIPE_SHADER_VERTEX:
342				pipe->bind_vs_state(pipe, shader_cso);
343				break;
344			case PIPE_SHADER_FRAGMENT:
345				pipe->bind_fs_state(pipe, shader_cso);
346				break;
347			case PIPE_SHADER_GEOMETRY:
348				pipe->bind_gs_state(pipe, shader_cso);
349				break;
350			}
351			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
352		}
353	}
354
355	template<unsigned s>
356	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
357	{
358		for(unsigned i = 0; i < count; ++i)
359		{
360			if(constbufs[i] != constant_buffers[s][start + i].p)
361			{
362				constant_buffers[s][start + i] = constbufs[i];
363				if(s < caps.stages && start + i < caps.constant_buffers[s])
364					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
365			}
366		}
367	}
368
369	template<unsigned s>
370	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
371	{
372		int last_different = -1;
373		for(unsigned i = 0; i < count; ++i)
374		{
375			if(shader_resource_views[s][start + i].p != srvs[i])
376			{
377				shader_resource_views[s][start + i] = srvs[i];
378				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
379				last_different = i;
380			}
381		}
382		if(last_different >= 0)
383		{
384			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
385			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
386		}
387	}
388
389	template<unsigned s>
390	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
391	{
392		int last_different = -1;
393		for(unsigned i = 0; i < count; ++i)
394		{
395			if(samplers[s][start + i].p != samps[i])
396			{
397				samplers[s][start + i] = samps[i];
398				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
399				last_different = i;
400			}
401			if(last_different >= 0)
402			{
403				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
404				update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
405			}
406		}
407	}
408
409#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
410	virtual void STDMETHODCALLTYPE XS##SetShader( \
411		ID3D11##Stage##Shader *pShader \
412		SET_SHADER_EXTRA_ARGS) \
413	{ \
414		SYNCHRONIZED; \
415		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
416	} \
417	virtual void STDMETHODCALLTYPE XS##GetShader(\
418		ID3D11##Stage##Shader **ppShader \
419		GET_SHADER_EXTRA_ARGS) \
420	{ \
421		SYNCHRONIZED; \
422		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
423	} \
424	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
425		unsigned start, \
426		unsigned count, \
427		ID3D11Buffer *const* constant_buffers) \
428	{ \
429		SYNCHRONIZED; \
430		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
431	} \
432	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
433		unsigned start, \
434		unsigned count, \
435		ID3D11Buffer **out_constant_buffers) \
436	{ \
437		SYNCHRONIZED; \
438		for(unsigned i = 0; i < count; ++i) \
439			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
440	} \
441	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
442		unsigned start, \
443		unsigned count, \
444		ID3D11ShaderResourceView *const *new_shader_resource_views) \
445	{ \
446		SYNCHRONIZED; \
447		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
448	} \
449	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
450		unsigned start, \
451		unsigned count, \
452		ID3D11ShaderResourceView **out_shader_resource_views) \
453	{ \
454		SYNCHRONIZED; \
455		for(unsigned i = 0; i < count; ++i) \
456			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
457	} \
458	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
459		unsigned start, \
460		unsigned count, \
461		ID3D11SamplerState *const *new_samplers) \
462	{ \
463		SYNCHRONIZED; \
464		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
465	} \
466	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
467		unsigned start, \
468		unsigned count, \
469		ID3D11SamplerState **out_samplers) \
470	{ \
471		SYNCHRONIZED; \
472		for(unsigned i = 0; i < count; ++i) \
473			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
474	}
475
476#define DO_VS(x) x
477#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
478#define DO_PS(x) x
479#define DO_HS(x)
480#define DO_DS(x)
481#define DO_CS(x)
482	IMPLEMENT_SHADER_STAGE(VS, Vertex)
483	IMPLEMENT_SHADER_STAGE(GS, Geometry)
484	IMPLEMENT_SHADER_STAGE(PS, Pixel)
485
486#if API >= 11
487	IMPLEMENT_SHADER_STAGE(HS, Hull)
488	IMPLEMENT_SHADER_STAGE(DS, Domain)
489	IMPLEMENT_SHADER_STAGE(CS, Compute)
490
491	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
492		unsigned start,
493		unsigned count,
494		ID3D11UnorderedAccessView *const *new_unordered_access_views,
495		const unsigned *new_uav_initial_counts)
496	{
497		SYNCHRONIZED;
498		for(unsigned i = 0; i < count; ++i)
499			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
500	}
501
502	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
503		unsigned start,
504		unsigned count,
505		ID3D11UnorderedAccessView **out_unordered_access_views)
506	{
507		SYNCHRONIZED;
508		for(unsigned i = 0; i < count; ++i)
509			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
510	}
511#endif
512
513	template<unsigned s>
514	void update_stage()
515	{
516		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
517		{
518			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
519				--num_shader_resource_views[s];
520			if((1 << s) & caps.stages_with_sampling)
521			{
522				struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
523				unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
524				for(unsigned i = 0; i < num_views_to_bind; ++i)
525				{
526					views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
527				}
528				switch(s)
529				{
530				case PIPE_SHADER_VERTEX:
531					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
532					break;
533				case PIPE_SHADER_FRAGMENT:
534					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
535					break;
536				case PIPE_SHADER_GEOMETRY:
537					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
538					break;
539				}
540			}
541		}
542
543		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
544		{
545			while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
546				--num_samplers[s];
547			if((1 << s) & caps.stages_with_sampling)
548			{
549				void* samplers_to_bind[PIPE_MAX_SAMPLERS];
550				unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
551				for(unsigned i = 0; i < num_samplers_to_bind; ++i)
552				{
553					// index can be -1 to access sampler_csos[s].ld
554					samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
555				}
556				switch(s)
557				{
558				case PIPE_SHADER_VERTEX:
559					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
560					break;
561				case PIPE_SHADER_FRAGMENT:
562					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
563					break;
564				case PIPE_SHADER_GEOMETRY:
565					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
566					break;
567				}
568			}
569		}
570	}
571
572	void update_state()
573	{
574		update_stage<D3D11_STAGE_PS>();
575		update_stage<D3D11_STAGE_VS>();
576		update_stage<D3D11_STAGE_GS>();
577#if API >= 11
578		update_stage<D3D11_STAGE_HS>();
579		update_stage<D3D11_STAGE_DS>();
580		update_stage<D3D11_STAGE_CS>();
581#endif
582
583		if(update_flags & UPDATE_VERTEX_BUFFERS)
584		{
585			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
586				--num_vertex_buffers;
587			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
588		}
589
590		update_flags = 0;
591	}
592
593	virtual void STDMETHODCALLTYPE IASetInputLayout(
594		ID3D11InputLayout *new_input_layout)
595	{
596		SYNCHRONIZED;
597		if(new_input_layout != input_layout.p)
598		{
599			input_layout = new_input_layout;
600			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
601		}
602	}
603
604	virtual void STDMETHODCALLTYPE IAGetInputLayout(
605		ID3D11InputLayout **out_input_layout)
606	{
607		SYNCHRONIZED;
608		*out_input_layout = input_layout.ref();
609	}
610
611	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
612		unsigned start,
613		unsigned count,
614		ID3D11Buffer *const *new_vertex_buffers,
615		const unsigned *new_strides,
616		const unsigned *new_offsets)
617	{
618		SYNCHRONIZED;
619		int last_different = -1;
620		for(unsigned i = 0; i < count; ++i)
621		{
622			ID3D11Buffer* buffer = new_vertex_buffers[i];
623			if(buffer != input_buffers[start + i].p
624				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
625				|| vertex_buffers[start + i].stride != new_offsets[i]
626			)
627			{
628				input_buffers[start + i] = buffer;
629				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
630				vertex_buffers[start + i].buffer_offset = new_offsets[i];
631				vertex_buffers[start + i].stride = new_strides[i];
632				last_different = i;
633			}
634		}
635		if(last_different >= 0)
636		{
637			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
638			update_flags |= UPDATE_VERTEX_BUFFERS;
639		}
640	}
641
642	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
643		unsigned start,
644		unsigned count,
645		ID3D11Buffer **out_vertex_buffers,
646		unsigned *out_strides,
647		unsigned *out_offsets)
648	{
649		SYNCHRONIZED;
650		if(out_vertex_buffers)
651		{
652			for(unsigned i = 0; i < count; ++i)
653				out_vertex_buffers[i] = input_buffers[start + i].ref();
654		}
655
656		if(out_offsets)
657		{
658			for(unsigned i = 0; i < count; ++i)
659				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
660		}
661
662		if(out_strides)
663		{
664			for(unsigned i = 0; i < count; ++i)
665				out_strides[i] = vertex_buffers[start + i].stride;
666		}
667	}
668
669	void set_index_buffer()
670	{
671		pipe_index_buffer ib;
672		if(!index_buffer)
673		{
674			memset(&ib, 0, sizeof(ib));
675		}
676		else
677		{
678			switch(index_format) {
679			case DXGI_FORMAT_R32_UINT:
680				ib.index_size = 4;
681				strip_cut_index = 0xffffffff;
682				break;
683			case DXGI_FORMAT_R16_UINT:
684				ib.index_size = 2;
685				strip_cut_index = 0xffff;
686				break;
687			default:
688				ib.index_size = 1;
689				strip_cut_index = 0xff;
690				break;
691			}
692			ib.offset = index_offset;
693			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
694		}
695		pipe->set_index_buffer(pipe, &ib);
696	}
697
698	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
699		ID3D11Buffer *new_index_buffer,
700		DXGI_FORMAT new_index_format,
701		unsigned new_index_offset)
702	{
703		SYNCHRONIZED;
704		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
705		{
706			index_buffer = new_index_buffer;
707			index_format = new_index_format;
708			index_offset = new_index_offset;
709
710			set_index_buffer();
711		}
712	}
713
714	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
715		ID3D11Buffer **out_index_buffer,
716		DXGI_FORMAT *out_index_format,
717		unsigned *out_index_offset)
718	{
719		SYNCHRONIZED;
720		if(out_index_buffer)
721			*out_index_buffer = index_buffer.ref();
722		if(out_index_format)
723			*out_index_format = index_format;
724		if(out_index_offset)
725			*out_index_offset = index_offset;
726	}
727
728	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
729		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
730	{
731		SYNCHRONIZED;
732		if(primitive_topology != new_primitive_topology)
733		{
734			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
735				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
736			else
737				primitive_mode = 0;
738			primitive_topology = new_primitive_topology;
739		}
740	}
741
742	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
743		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
744	{
745		SYNCHRONIZED;
746		*out_primitive_topology = primitive_topology;
747	}
748
749	virtual void STDMETHODCALLTYPE DrawIndexed(
750		unsigned index_count,
751		unsigned start_index_location,
752		int base_vertex_location)
753	{
754		SYNCHRONIZED;
755		if(update_flags)
756			update_state();
757
758		pipe_draw_info info;
759		info.mode = primitive_mode;
760		info.indexed = TRUE;
761		info.count = index_count;
762		info.start = start_index_location;
763		info.index_bias = base_vertex_location;
764		info.min_index = 0;
765		info.max_index = ~0;
766		info.start_instance = 0;
767		info.instance_count = 1;
768		info.primitive_restart = FALSE;
769
770		pipe->draw_vbo(pipe, &info);
771	}
772
773	virtual void STDMETHODCALLTYPE Draw(
774		unsigned vertex_count,
775		unsigned start_vertex_location)
776	{
777		SYNCHRONIZED;
778		if(update_flags)
779			update_state();
780
781		pipe_draw_info info;
782		info.mode = primitive_mode;
783		info.indexed = FALSE;
784		info.count = vertex_count;
785		info.start = start_vertex_location;
786		info.index_bias = 0;
787		info.min_index = 0;
788		info.max_index = ~0;
789		info.start_instance = 0;
790		info.instance_count = 1;
791		info.primitive_restart = TRUE;
792		info.restart_index = strip_cut_index;
793
794		pipe->draw_vbo(pipe, &info);
795	}
796
797	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
798		unsigned index_countPerInstance,
799		unsigned instance_count,
800		unsigned start_index_location,
801		int base_vertex_location,
802		unsigned start_instance_location)
803	{
804		SYNCHRONIZED;
805		if(update_flags)
806			update_state();
807
808		pipe_draw_info info;
809		info.mode = primitive_mode;
810		info.indexed = TRUE;
811		info.count = index_countPerInstance;
812		info.start = start_index_location;
813		info.index_bias = base_vertex_location;
814		info.min_index = 0;
815		info.max_index = ~0;
816		info.start_instance = start_instance_location;
817		info.instance_count = instance_count;
818		info.primitive_restart = FALSE;
819
820		pipe->draw_vbo(pipe, &info);
821	}
822
823	virtual void STDMETHODCALLTYPE DrawInstanced(
824		unsigned vertex_countPerInstance,
825		unsigned instance_count,
826		unsigned start_vertex_location,
827		unsigned start_instance_location)
828	{
829		SYNCHRONIZED;
830		if(update_flags)
831			update_state();
832
833		pipe_draw_info info;
834		info.mode = primitive_mode;
835		info.indexed = FALSE;
836		info.count = vertex_countPerInstance;
837		info.start = start_vertex_location;
838		info.index_bias = 0;
839		info.min_index = 0;
840		info.max_index = ~0;
841		info.start_instance = start_instance_location;
842		info.instance_count = instance_count;
843		info.primitive_restart = TRUE;
844		info.restart_index = strip_cut_index;
845
846		pipe->draw_vbo(pipe, &info);
847	}
848
849	virtual void STDMETHODCALLTYPE DrawAuto(void)
850	{
851		if(!caps.so)
852			return;
853
854		SYNCHRONIZED;
855		if(update_flags)
856			update_state();
857
858		pipe->draw_stream_output(pipe, primitive_mode);
859	}
860
861	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
862		ID3D11Buffer *buffer,
863		unsigned aligned_byte_offset)
864	{
865		SYNCHRONIZED;
866		if(update_flags)
867			update_state();
868
869		struct {
870			unsigned count;
871			unsigned instance_count;
872			unsigned start;
873			unsigned index_bias;
874		} data;
875
876		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
877
878		pipe_draw_info info;
879		info.mode = primitive_mode;
880		info.indexed = TRUE;
881		info.start = data.start;
882		info.count = data.count;
883		info.index_bias = data.index_bias;
884		info.min_index = 0;
885		info.max_index = ~0;
886		info.start_instance = 0;
887		info.instance_count = data.instance_count;
888		info.primitive_restart = FALSE;
889
890		pipe->draw_vbo(pipe, &info);
891	}
892
893	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
894		ID3D11Buffer *buffer,
895		unsigned aligned_byte_offset)
896	{
897		SYNCHRONIZED;
898		if(update_flags)
899			update_state();
900
901		struct {
902			unsigned count;
903			unsigned instance_count;
904			unsigned start;
905		} data;
906
907		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
908
909		pipe_draw_info info;
910		info.mode = primitive_mode;
911		info.indexed = FALSE;
912		info.start = data.start;
913		info.count = data.count;
914		info.index_bias = 0;
915		info.min_index = 0;
916		info.max_index = ~0;
917		info.start_instance = 0;
918		info.instance_count = data.instance_count;
919		info.primitive_restart = TRUE;
920		info.restart_index = strip_cut_index;
921
922		pipe->draw_vbo(pipe, &info);
923	}
924
925#if API >= 11
926	virtual void STDMETHODCALLTYPE Dispatch(
927		unsigned thread_group_count_x,
928		unsigned thread_group_count_y,
929		unsigned thread_group_count_z)
930	{
931// uncomment this when this is implemented
932//		SYNCHRONIZED;
933//		if(update_flags)
934//			update_state();
935	}
936
937	virtual void STDMETHODCALLTYPE DispatchIndirect(
938		ID3D11Buffer *buffer,
939		unsigned aligned_byte_offset)
940	{
941// uncomment this when this is implemented
942//		SYNCHRONIZED;
943//		if(update_flags)
944//			update_state();
945	}
946#endif
947
948	void set_clip()
949	{
950		pipe_clip_state clip;
951		clip.nr = 0;
952		clip.depth_clamp = depth_clamp;
953		pipe->set_clip_state(pipe, &clip);
954	}
955
956	virtual void STDMETHODCALLTYPE RSSetState(
957		ID3D11RasterizerState *new_rasterizer_state)
958	{
959		SYNCHRONIZED;
960		if(new_rasterizer_state != rasterizer_state.p)
961		{
962			rasterizer_state = new_rasterizer_state;
963			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
964			bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
965			if(depth_clamp != new_depth_clamp)
966			{
967				depth_clamp = new_depth_clamp;
968				set_clip();
969			}
970		}
971	}
972
973	virtual void STDMETHODCALLTYPE RSGetState(
974		ID3D11RasterizerState **out_rasterizer_state)
975	{
976		SYNCHRONIZED;
977		*out_rasterizer_state = rasterizer_state.ref();
978	}
979
980	void set_viewport()
981	{
982		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
983		pipe_viewport_state viewport;
984		float half_width = viewports[0].Width * 0.5f;
985		float half_height = viewports[0].Height * 0.5f;
986
987		viewport.scale[0] = half_width;
988		viewport.scale[1] = -half_height;
989		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
990		viewport.scale[3] = 1.0f;
991		viewport.translate[0] = half_width + viewports[0].TopLeftX;
992		viewport.translate[1] = half_height + viewports[0].TopLeftY;
993		viewport.translate[2] = viewports[0].MinDepth;
994		viewport.translate[3] = 1.0f;
995		pipe->set_viewport_state(pipe, &viewport);
996	}
997
998	virtual void STDMETHODCALLTYPE RSSetViewports(
999		unsigned count,
1000		const D3D11_VIEWPORT *new_viewports)
1001	{
1002		SYNCHRONIZED;
1003		if(count)
1004		{
1005			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
1006			{
1007				viewports[0] = new_viewports[0];
1008				set_viewport();
1009			}
1010			for(unsigned i = 1; i < count; ++i)
1011				viewports[i] = new_viewports[i];
1012		}
1013		else if(num_viewports)
1014		{
1015			// TODO: what should we do here?
1016			memset(&viewports[0], 0, sizeof(viewports[0]));
1017			set_viewport();
1018		}
1019		num_viewports = count;
1020	}
1021
1022	virtual void STDMETHODCALLTYPE RSGetViewports(
1023		unsigned *out_count,
1024		D3D11_VIEWPORT *out_viewports)
1025	{
1026		SYNCHRONIZED;
1027		if(out_viewports)
1028		{
1029			unsigned i;
1030			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1031				out_viewports[i] = viewports[i];
1032
1033			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1034		}
1035
1036		*out_count = num_viewports;
1037	}
1038
1039	void set_scissor()
1040	{
1041		pipe_scissor_state scissor;
1042		scissor.minx = scissor_rects[0].left;
1043		scissor.miny = scissor_rects[0].top;
1044		scissor.maxx = scissor_rects[0].right;
1045		scissor.maxy = scissor_rects[0].bottom;
1046		pipe->set_scissor_state(pipe, &scissor);
1047	}
1048
1049	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1050		unsigned count,
1051		const D3D11_RECT *new_rects)
1052	{
1053		SYNCHRONIZED;
1054		if(count)
1055		{
1056			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1057			{
1058				scissor_rects[0] = new_rects[0];
1059				set_scissor();
1060			}
1061			for(unsigned i = 1; i < count; ++i)
1062				scissor_rects[i] = new_rects[i];
1063		}
1064		else if(num_scissor_rects)
1065		{
1066			// TODO: what should we do here?
1067			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1068			set_scissor();
1069		}
1070
1071		num_scissor_rects = count;
1072	}
1073
1074	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1075		unsigned *out_count,
1076		D3D11_RECT *out_rects)
1077	{
1078		SYNCHRONIZED;
1079		if(out_rects)
1080		{
1081			unsigned i;
1082			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1083				out_rects[i] = scissor_rects[i];
1084
1085			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1086		}
1087
1088		*out_count = num_scissor_rects;
1089	}
1090
1091	virtual void STDMETHODCALLTYPE OMSetBlendState(
1092		ID3D11BlendState *new_blend_state,
1093		const float new_blend_factor[4],
1094		unsigned new_sample_mask)
1095	{
1096		SYNCHRONIZED;
1097		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1098
1099		if(blend_state.p != new_blend_state)
1100		{
1101			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1102			blend_state = new_blend_state;
1103		}
1104
1105		// Windows D3D11 does this, even though it's apparently undocumented
1106		if(!new_blend_factor)
1107			new_blend_factor = white;
1108
1109		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1110		{
1111			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1112			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1113		}
1114
1115		if(sample_mask != new_sample_mask)
1116		{
1117			pipe->set_sample_mask(pipe, new_sample_mask);
1118			sample_mask = new_sample_mask;
1119		}
1120	}
1121
1122	virtual void STDMETHODCALLTYPE OMGetBlendState(
1123		ID3D11BlendState **out_blend_state,
1124		float out_blend_factor[4],
1125		unsigned *out_sample_mask)
1126	{
1127		SYNCHRONIZED;
1128		if(out_blend_state)
1129			*out_blend_state = blend_state.ref();
1130		if(out_blend_factor)
1131			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1132		if(out_sample_mask)
1133			*out_sample_mask = sample_mask;
1134	}
1135
1136	void set_stencil_ref()
1137	{
1138		struct pipe_stencil_ref sref;
1139		sref.ref_value[0] = stencil_ref;
1140		sref.ref_value[1] = stencil_ref;
1141		pipe->set_stencil_ref(pipe, &sref);
1142	}
1143
1144	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1145		ID3D11DepthStencilState *new_depth_stencil_state,
1146		unsigned new_stencil_ref)
1147	{
1148		SYNCHRONIZED;
1149		if(new_depth_stencil_state != depth_stencil_state.p)
1150		{
1151			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1152			depth_stencil_state = new_depth_stencil_state;
1153		}
1154
1155		if(new_stencil_ref != stencil_ref)
1156		{
1157			stencil_ref = new_stencil_ref;
1158			set_stencil_ref();
1159		}
1160	}
1161
1162	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1163		ID3D11DepthStencilState **out_depth_stencil_state,
1164		unsigned *out_stencil_ref)
1165	{
1166		SYNCHRONIZED;
1167		if(*out_depth_stencil_state)
1168			*out_depth_stencil_state = depth_stencil_state.ref();
1169		if(out_stencil_ref)
1170			*out_stencil_ref = stencil_ref;
1171	}
1172
1173	void set_framebuffer()
1174	{
1175		struct pipe_framebuffer_state fb;
1176		memset(&fb, 0, sizeof(fb));
1177		if(depth_stencil_view)
1178		{
1179			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1180			fb.zsbuf = surf;
1181			if(surf->width > fb.width)
1182				fb.width = surf->width;
1183			if(surf->height > fb.height)
1184				fb.height = surf->height;
1185		}
1186		fb.nr_cbufs = num_render_target_views;
1187		unsigned i;
1188		for(i = 0; i < num_render_target_views; ++i)
1189		{
1190			if(render_target_views[i])
1191			{
1192				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1193				fb.cbufs[i] = surf;
1194				if(surf->width > fb.width)
1195					fb.width = surf->width;
1196				if(surf->height > fb.height)
1197					fb.height = surf->height;
1198			}
1199		}
1200
1201		pipe->set_framebuffer_state(pipe, &fb);
1202	}
1203
1204	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1205	 * Hopefully nobody relies on this happening
1206	 */
1207
1208	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1209		unsigned count,
1210		ID3D11RenderTargetView *const *new_render_target_views,
1211		ID3D11DepthStencilView  *new_depth_stencil_view)
1212	{
1213		SYNCHRONIZED;
1214		if(!new_render_target_views)
1215			count = 0;
1216		if(count == num_render_target_views)
1217		{
1218			for(unsigned i = 0; i < count; ++i)
1219			{
1220				if(new_render_target_views[i] != render_target_views[i].p)
1221					goto changed;
1222			}
1223			return;
1224		}
1225changed:
1226		depth_stencil_view = new_depth_stencil_view;
1227		unsigned i;
1228		for(i = 0; i < count; ++i)
1229		{
1230			render_target_views[i] = new_render_target_views[i];
1231#if API >= 11
1232			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1233#endif
1234		}
1235		for(; i < num_render_target_views; ++i)
1236			render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1237		num_render_target_views = count;
1238		set_framebuffer();
1239	}
1240
1241	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1242		unsigned count,
1243		ID3D11RenderTargetView **out_render_target_views,
1244		ID3D11DepthStencilView  **out_depth_stencil_view)
1245	{
1246		SYNCHRONIZED;
1247		if(out_render_target_views)
1248		{
1249			unsigned i;
1250			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1251				out_render_target_views[i] = render_target_views[i].ref();
1252
1253			for(; i < count; ++i)
1254				out_render_target_views[i] = 0;
1255		}
1256
1257		if(out_depth_stencil_view)
1258			*out_depth_stencil_view = depth_stencil_view.ref();
1259	}
1260
1261#if API >= 11
1262	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1263	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1264		unsigned rtv_count,
1265		ID3D11RenderTargetView *const *new_render_target_views,
1266		ID3D11DepthStencilView  *new_depth_stencil_view,
1267		unsigned uav_start,
1268		unsigned uav_count,
1269		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1270		const unsigned *new_uav_initial_counts)
1271	{
1272		SYNCHRONIZED;
1273		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1274			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1275
1276		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1277		{
1278			for(unsigned i = 0; i < uav_count; ++i)
1279			{
1280				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1281				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1282			}
1283		}
1284	}
1285
1286	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1287		unsigned rtv_count,
1288		ID3D11RenderTargetView **out_render_target_views,
1289		ID3D11DepthStencilView  **out_depth_stencil_view,
1290		unsigned uav_start,
1291		unsigned uav_count,
1292		ID3D11UnorderedAccessView **out_unordered_access_views)
1293	{
1294		SYNCHRONIZED;
1295		if(out_render_target_views)
1296			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1297
1298		if(out_unordered_access_views)
1299		{
1300			for(unsigned i = 0; i < uav_count; ++i)
1301				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1302		}
1303	}
1304#endif
1305
1306	virtual void STDMETHODCALLTYPE SOSetTargets(
1307		unsigned count,
1308		ID3D11Buffer *const *new_so_targets,
1309		const unsigned *new_offsets)
1310	{
1311		SYNCHRONIZED;
1312		unsigned i;
1313		if(!new_so_targets)
1314			count = 0;
1315		bool changed = false;
1316		for(i = 0; i < count; ++i)
1317		{
1318			ID3D11Buffer* buffer = new_so_targets[i];
1319			if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1320			{
1321				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1322				so_targets[i] = buffer;
1323				so_offsets[i] = new_offsets[i];
1324				changed = true;
1325			}
1326		}
1327		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1328		{
1329			if(so_targets[i].p || so_offsets[i])
1330			{
1331				changed = true;
1332				so_targets[i] = (ID3D11Buffer*)0;
1333				so_offsets[i] = 0;
1334			}
1335		}
1336		num_so_targets = count;
1337
1338		if(changed && caps.so)
1339			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1340	}
1341
1342	virtual void STDMETHODCALLTYPE SOGetTargets(
1343		unsigned count,
1344		ID3D11Buffer **out_so_targets
1345#if API < 11
1346		, UINT *out_offsets
1347#endif
1348		)
1349	{
1350		SYNCHRONIZED;
1351		for(unsigned i = 0; i < count; ++i)
1352		{
1353			out_so_targets[i] = so_targets[i].ref();
1354#if API < 11
1355			out_offsets[i] = so_offsets[i];
1356#endif
1357		}
1358	}
1359
1360	virtual void STDMETHODCALLTYPE Begin(
1361		ID3D11Asynchronous *async)
1362	{
1363		SYNCHRONIZED;
1364		if(caps.queries)
1365			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1366	}
1367
1368	virtual void STDMETHODCALLTYPE End(
1369		ID3D11Asynchronous *async)
1370	{
1371		SYNCHRONIZED;
1372		if(caps.queries)
1373			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1374	}
1375
1376	virtual HRESULT STDMETHODCALLTYPE GetData(
1377		ID3D11Asynchronous *iasync,
1378		void *out_data,
1379		unsigned data_size,
1380		unsigned get_data_flags)
1381	{
1382		SYNCHRONIZED;
1383		if(!caps.queries)
1384			return E_NOTIMPL;
1385
1386		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1387		void* tmp_data = alloca(async->data_size);
1388		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1389		if(out_data)
1390			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1391		return ret ? S_OK : S_FALSE;
1392	}
1393
1394	void set_render_condition()
1395	{
1396		if(caps.render_condition)
1397		{
1398			if(!render_predicate)
1399				pipe->render_condition(pipe, 0, 0);
1400			else
1401			{
1402				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1403				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1404				{
1405					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1406					pipe->render_condition(pipe, predicate->query, mode);
1407				}
1408				else
1409				{
1410					/* TODO: add inverted predication to Gallium*/
1411					pipe->render_condition(pipe, 0, 0);
1412				}
1413			}
1414		}
1415	}
1416
1417	virtual void STDMETHODCALLTYPE SetPredication(
1418		ID3D11Predicate *new_predicate,
1419		BOOL new_predicate_value)
1420	{
1421		SYNCHRONIZED;
1422		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1423		{
1424			render_predicate = new_predicate;
1425			render_predicate_value = new_predicate_value;
1426			set_render_condition();
1427		}
1428	}
1429
1430	virtual void STDMETHODCALLTYPE GetPredication(
1431		ID3D11Predicate **out_predicate,
1432		BOOL *out_predicate_value)
1433	{
1434		SYNCHRONIZED;
1435		if(out_predicate)
1436			*out_predicate = render_predicate.ref();
1437		if(out_predicate_value)
1438			*out_predicate_value = render_predicate_value;
1439	}
1440
1441	static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1442	{
1443		if(subresource <= resource->last_level)
1444		{
1445			return subresource;
1446		}
1447		else
1448		{
1449			unsigned levels = resource->last_level + 1;
1450			return subresource % levels;
1451		}
1452	}
1453
1454	static unsigned d3d11_subresource_to_face(struct pipe_resource* resource, unsigned subresource)
1455	{
1456		if(subresource <= resource->last_level)
1457		{
1458			return 0;
1459		}
1460		else
1461		{
1462			unsigned levels = resource->last_level + 1;
1463			return subresource / levels;
1464		}
1465	}
1466
1467
1468	/* TODO: deferred contexts will need a different implementation of this,
1469	 * because we can't put the transfer info into the resource itself.
1470	 * Also, there are very different restrictions, for obvious reasons.
1471	 */
1472	virtual HRESULT STDMETHODCALLTYPE Map(
1473		ID3D11Resource *iresource,
1474		unsigned subresource,
1475		D3D11_MAP map_type,
1476		unsigned map_flags,
1477		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1478	{
1479		SYNCHRONIZED;
1480		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1481		if(resource->transfers.count(subresource))
1482			return E_FAIL;
1483		unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1484		unsigned face = d3d11_subresource_to_face(resource->resource, subresource);
1485		pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1486		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1487		unsigned usage = 0;
1488		if(map_type == D3D11_MAP_READ)
1489			usage = PIPE_TRANSFER_READ;
1490		else if(map_type == D3D11_MAP_WRITE)
1491			usage = PIPE_TRANSFER_WRITE;
1492		else if(map_type == D3D11_MAP_READ_WRITE)
1493			usage = PIPE_TRANSFER_READ_WRITE;
1494		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1495			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1496		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1497			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1498		else
1499			return E_INVALIDARG;
1500		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1501			usage |= PIPE_TRANSFER_DONTBLOCK;
1502		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1503		if(!transfer) {
1504			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1505				return DXGI_ERROR_WAS_STILL_DRAWING;
1506			else
1507				return E_FAIL;
1508		}
1509		resource->transfers[subresource] = transfer;
1510		mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1511		mapped_resource->RowPitch = transfer->stride;
1512		mapped_resource->DepthPitch = transfer->layer_stride;
1513		return S_OK;
1514	}
1515
1516	virtual void STDMETHODCALLTYPE Unmap(
1517		ID3D11Resource *iresource,
1518		unsigned subresource)
1519	{
1520		SYNCHRONIZED;
1521		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1522		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1523		if(i != resource->transfers.end())
1524		{
1525			pipe->transfer_unmap(pipe, i->second);
1526			pipe->transfer_destroy(pipe, i->second);
1527			resource->transfers.erase(i);
1528		}
1529	}
1530
1531	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1532		ID3D11Resource *dst_resource,
1533		unsigned dst_subresource,
1534		unsigned dst_x,
1535		unsigned dst_y,
1536		unsigned dst_z,
1537		ID3D11Resource *src_resource,
1538		unsigned src_subresource,
1539		const D3D11_BOX *src_box)
1540	{
1541		SYNCHRONIZED;
1542		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1543		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1544		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1545		unsigned dst_face = d3d11_subresource_to_face(dst->resource, dst_subresource);
1546		unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1547		unsigned src_face = d3d11_subresource_to_face(src->resource, src_subresource);
1548		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1549		pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1550		{
1551			pipe->resource_copy_region(pipe,
1552				dst->resource, dst_level, dst_x, dst_y, dst_z,
1553				src->resource, src_level, &box);
1554		}
1555	}
1556
1557	virtual void STDMETHODCALLTYPE CopyResource(
1558		ID3D11Resource *dst_resource,
1559		ID3D11Resource *src_resource)
1560	{
1561		SYNCHRONIZED;
1562		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1563		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1564		unsigned level;
1565		for(level = 0; level <= dst->resource->last_level; ++level)
1566		{
1567		        unsigned layers = 1;
1568			pipe_box box;
1569			if (dst->resource->target == PIPE_TEXTURE_CUBE)
1570				layers = 6;
1571			else if (dst->resource->target == PIPE_TEXTURE_3D)
1572				layers = u_minify(dst->resource->depth0, level);
1573			/* else layers = dst->resource->array_size; */
1574			box.x = box.y = box.z = 0;
1575			box.width = u_minify(dst->resource->width0, level);
1576			box.height = u_minify(dst->resource->height0, level);
1577			box.depth = layers;
1578			pipe->resource_copy_region(pipe,
1579						   dst->resource, level, 0, 0, 0,
1580						   src->resource, level, &box);
1581		}
1582	}
1583
1584	virtual void STDMETHODCALLTYPE UpdateSubresource(
1585		ID3D11Resource *dst_resource,
1586		unsigned dst_subresource,
1587		const D3D11_BOX *pDstBox,
1588		const void *pSrcData,
1589		unsigned src_row_pitch,
1590		unsigned src_depth_pitch)
1591	{
1592		SYNCHRONIZED;
1593		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1594		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1595		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1596		pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1597		pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1598	}
1599
1600#if API >= 11
1601	virtual void STDMETHODCALLTYPE CopyStructureCount(
1602		ID3D11Buffer *dst_buffer,
1603		unsigned dst_aligned_byte_offset,
1604		ID3D11UnorderedAccessView *src_view)
1605	{
1606		SYNCHRONIZED;
1607	}
1608#endif
1609
1610	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1611		ID3D11RenderTargetView *render_target_view,
1612		const float color[4])
1613	{
1614		SYNCHRONIZED;
1615		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1616		union pipe_color_union cc;
1617		cc.f[0] = color[0];
1618		cc.f[1] = color[1];
1619		cc.f[2] = color[2];
1620		cc.f[3] = color[3];
1621		pipe->clear_render_target(pipe, view->object, &cc, 0, 0, view->object->width, view->object->height);
1622	}
1623
1624	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1625		ID3D11DepthStencilView  *depth_stencil_view,
1626		unsigned clear_flags,
1627		float depth,
1628		UINT8 stencil)
1629	{
1630		SYNCHRONIZED;
1631		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1632		unsigned flags = 0;
1633		if(clear_flags & D3D11_CLEAR_DEPTH)
1634			flags |= PIPE_CLEAR_DEPTH;
1635		if(clear_flags & D3D11_CLEAR_STENCIL)
1636			flags |= PIPE_CLEAR_STENCIL;
1637		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1638	}
1639
1640#if API >= 11
1641	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1642		ID3D11UnorderedAccessView *unordered_access_view,
1643		const unsigned values[4])
1644	{
1645		SYNCHRONIZED;
1646	}
1647
1648	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1649			ID3D11UnorderedAccessView *unordered_access_view,
1650			const float values[4])
1651	{
1652		SYNCHRONIZED;
1653	}
1654#endif
1655
1656	void restore_gallium_state_blit_only()
1657	{
1658		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1659		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1660		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1661		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1662		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1663		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1664		if(caps.gs)
1665			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1666		set_framebuffer();
1667		set_viewport();
1668		set_clip();
1669		set_render_condition();
1670		// TODO: restore stream output
1671
1672		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1673	}
1674
1675	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1676	{
1677		SYNCHRONIZED;
1678		restore_gallium_state_blit_only();
1679	}
1680
1681	virtual void STDMETHODCALLTYPE GenerateMips(
1682		ID3D11ShaderResourceView *shader_resource_view)
1683	{
1684		SYNCHRONIZED;
1685
1686		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1687		if(caps.gs)
1688			pipe->bind_gs_state(pipe, 0);
1689		if(caps.so)
1690			pipe->bind_stream_output_state(pipe, 0);
1691		if(pipe->render_condition)
1692			pipe->render_condition(pipe, 0, 0);
1693		util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1694		restore_gallium_state_blit_only();
1695	}
1696
1697	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1698	{
1699		SYNCHRONIZED;
1700		restore_gallium_state_blit_only();
1701
1702		set_index_buffer();
1703		set_stencil_ref();
1704		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1705		pipe->set_sample_mask(pipe, sample_mask);
1706
1707		for(unsigned s = 0; s < 3; ++s)
1708		{
1709			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1710			for(unsigned i = 0; i < num; ++i)
1711				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1712		}
1713
1714		if(caps.so)
1715			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1716
1717		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1718		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1719
1720		set_scissor();
1721	}
1722
1723#if API >= 11
1724	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1725	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1726		ID3D11Resource *iresource,
1727		float min_lod)
1728	{
1729		SYNCHRONIZED;
1730		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1731		if(resource->min_lod != min_lod)
1732		{
1733			// TODO: actually do anything?
1734			resource->min_lod = min_lod;
1735		}
1736	}
1737
1738	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1739		ID3D11Resource *iresource)
1740	{
1741		SYNCHRONIZED;
1742		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1743		return resource->min_lod;
1744	}
1745#endif
1746
1747	virtual void STDMETHODCALLTYPE ResolveSubresource(
1748		ID3D11Resource *dst_resource,
1749		unsigned dst_subresource,
1750		ID3D11Resource *src_resource,
1751		unsigned src_subresource,
1752		DXGI_FORMAT format)
1753	{
1754		SYNCHRONIZED;
1755		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1756		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1757		struct pipe_resolve_info info;
1758
1759		info.dst.res = dst->resource;
1760		info.src.res = src->resource;
1761		info.dst.level = 0;
1762		info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
1763		info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource);
1764
1765		info.src.x0 = 0;
1766		info.src.x1 = info.src.res->width0;
1767		info.src.y0 = 0;
1768		info.src.y1 = info.src.res->height0;
1769		info.dst.x0 = 0;
1770		info.dst.x1 = info.dst.res->width0;
1771		info.dst.y0 = 0;
1772		info.dst.y1 = info.dst.res->height0;
1773
1774		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
1775
1776		pipe->resource_resolve(pipe, &info);
1777	}
1778
1779#if API >= 11
1780	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1781		ID3D11CommandList *command_list,
1782		BOOL restore_context_state)
1783	{
1784		SYNCHRONIZED;
1785	}
1786
1787	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1788		BOOL restore_deferred_context_state,
1789		ID3D11CommandList **out_command_list)
1790	{
1791		SYNCHRONIZED;
1792		return E_NOTIMPL;
1793	}
1794#endif
1795
1796	virtual void STDMETHODCALLTYPE ClearState(void)
1797	{
1798		/* we don't take a lock here because we would deadlock otherwise
1799		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1800		 * However, I can't think of any correct usage that would be affected by this
1801		 * being non-atomic, and making this atomic is quite expensive and complicates
1802		 * the code
1803		 */
1804
1805		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1806		// TODO: make sure all this gets inlined, which might require more compiler flags
1807		// TODO: optimize this
1808#if API >= 11
1809		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1810		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1811		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1812		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1813		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1814		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1815#else
1816		GalliumD3D11DeviceContext::PSSetShader(0);
1817		GalliumD3D11DeviceContext::GSSetShader(0);
1818		GalliumD3D11DeviceContext::VSSetShader(0);
1819#endif
1820
1821		GalliumD3D11DeviceContext::IASetInputLayout(0);
1822		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1823		GalliumD3D11DeviceContext::RSSetState(0);
1824		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1825		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1826		GalliumD3D11DeviceContext::SetPredication(0, 0);
1827		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1828
1829		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1830		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1831		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1832#if API >= 11
1833		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1834		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1835		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1836#endif
1837
1838		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1839#if API >= 11
1840		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1841#else
1842		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1843#endif
1844		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1845
1846		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1847		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1848		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1849#if API >= 11
1850		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1851		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1852		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1853#endif
1854
1855		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1856		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1857		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1858#if API >= 11
1859		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1860		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1861		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1862#endif
1863
1864		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1865		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1866	}
1867
1868	virtual void STDMETHODCALLTYPE Flush(void)
1869	{
1870		SYNCHRONIZED;
1871                pipe->flush(pipe, 0);
1872	}
1873
1874	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1875	 * cleanly unbound from the pipeline.
1876	 * In Direct3D 11, the pipeline holds a reference.
1877	 *
1878	 * Note that instead of always scanning the pipeline on destruction, we could
1879	 * maintain the internal reference count on DirectX 10 and use it to check if an
1880	 * object is still bound.
1881	 * Presumably, on average, scanning is faster if the application is well written.
1882	 */
1883#if API < 11
1884#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1885	void Unbind##name(ID3D11##name* state) \
1886	{ \
1887		SYNCHRONIZED; \
1888		if((void*)state == (void*)member.p) \
1889		{ \
1890			member.p = 0; \
1891			pipe->bind_##gallium##_state(pipe, default_##def); \
1892		} \
1893	}
1894	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1895	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1896	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1897	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1898	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1899	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1900	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1901
1902	void UnbindPredicate(ID3D11Predicate* predicate)
1903	{
1904		SYNCHRONIZED;
1905		if(predicate == render_predicate)
1906		{
1907			render_predicate.p = NULL;
1908			render_predicate_value = 0;
1909			pipe->render_condition(pipe, 0, 0);
1910		}
1911	}
1912
1913	void UnbindSamplerState(ID3D11SamplerState* state)
1914	{
1915		SYNCHRONIZED;
1916		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1917		{
1918			for(unsigned i = 0; i < num_samplers[s]; ++i)
1919			{
1920				if(samplers[s][i] == state)
1921				{
1922					samplers[s][i].p = NULL;
1923					sampler_csos[s].v[i] = NULL;
1924					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1925				}
1926			}
1927		}
1928	}
1929
1930	void UnbindBuffer(ID3D11Buffer* buffer)
1931	{
1932		SYNCHRONIZED;
1933		if(buffer == index_buffer)
1934		{
1935			index_buffer.p = 0;
1936			index_format = DXGI_FORMAT_UNKNOWN;
1937			index_offset = 0;
1938			struct pipe_index_buffer ib;
1939			memset(&ib, 0, sizeof(ib));
1940			pipe->set_index_buffer(pipe, &ib);
1941		}
1942
1943		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1944		{
1945			if(buffer == input_buffers[i])
1946			{
1947				input_buffers[i].p = 0;
1948				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1949				update_flags |= UPDATE_VERTEX_BUFFERS;
1950			}
1951		}
1952
1953		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1954		{
1955			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1956			{
1957				if(constant_buffers[s][i] == buffer)
1958				{
1959					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1960					pipe->set_constant_buffer(pipe, s, i, NULL);
1961				}
1962			}
1963		}
1964	}
1965
1966	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1967	{
1968		SYNCHRONIZED;
1969		if(view == depth_stencil_view)
1970		{
1971			depth_stencil_view.p = NULL;
1972			set_framebuffer();
1973		}
1974	}
1975
1976	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1977	{
1978		SYNCHRONIZED;
1979		bool any_bound = false;
1980		for(unsigned i = 0; i < num_render_target_views; ++i)
1981		{
1982			if(render_target_views[i] == view)
1983			{
1984				render_target_views[i].p = NULL;
1985				any_bound = true;
1986			}
1987		}
1988		if(any_bound)
1989			set_framebuffer();
1990	}
1991
1992	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1993	{
1994		SYNCHRONIZED;
1995		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1996		{
1997			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1998			{
1999				if(shader_resource_views[s][i] == view)
2000				{
2001					shader_resource_views[s][i].p = NULL;
2002					sampler_views[s][i] = NULL;
2003					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
2004				}
2005			}
2006		}
2007	}
2008#endif
2009
2010#undef SYNCHRONIZED
2011};
2012
2013#if API >= 11
2014/* This approach serves two purposes.
2015 * First, we don't want to do an atomic operation to manipulate the reference
2016 * count every time something is bound/unbound to the pipeline, since they are
2017 * expensive.
2018 * Fortunately, the immediate context can only be used by a single thread, so
2019 * we don't have to use them, as long as a separate reference count is used
2020 * (see dual_refcnt_t).
2021 *
2022 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
2023 * garbage cycle.
2024 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
2025 * one for each external reference count, while internal nonatomic_add_ref doesn't
2026 * add any.
2027 *
2028 * Note that ideally we would to eliminate the non-atomic op too, but this is more
2029 * complicated, since we would either need to use garbage collection and give up
2030 * deterministic destruction (especially bad for large textures), or scan the whole
2031 * pipeline state every time the reference count of object drops to 0, which risks
2032 * pathological slowdowns.
2033 *
2034 * Since this microoptimization should matter relatively little, let's avoid it for now.
2035 *
2036 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
2037 * Eliminating the atomic ops for deferred contexts seems substantially harder.
2038 * This might be a problem if they are used in a one-shot multithreaded rendering
2039 * fashion, where SMP cacheline bouncing on the reference count may be visible.
2040 *
2041 * The idea would be to attach a structure of reference counts indexed by deferred
2042 * context id to each object. Ideally, this should be organized like ext2 block pointers.
2043 *
2044 * Every deferred context would get a reference count in its own cacheline.
2045 * The external count is protected by a lock bit, and there is also a "lock bit" in each
2046 * internal count.
2047 *
2048 * When the external count has to be dropped to 0, the lock bit is taken and all internal
2049 * reference counts are scanned, taking a count of them. A flag would also be set on them.
2050 * Deferred context manipulation would notice the flag, and update the count.
2051 * Once the count goes to zero, the object is freed.
2052 *
2053 * The problem of this is that if the external reference count ping-pongs between
2054 * zero and non-zero, the scans will take a lot of time.
2055 *
2056 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2057 * each binary tree node would have a "determined bit", which would be invalidated
2058 * by manipulations.
2059 *
2060 * However, all this complexity might actually be a loss in most cases, so let's just
2061 * stick to a single atomic refcnt for now.
2062 *
2063 * Also, we don't even support deferred contexts yet, so this can wait.
2064 */
2065struct nonatomic_device_child_ptr_traits
2066{
2067	static void add_ref(void* p)
2068	{
2069		if(p)
2070			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2071	}
2072
2073	static void release(void* p)
2074	{
2075		if(p)
2076			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2077	}
2078};
2079
2080struct GalliumD3D11ImmediateDeviceContext
2081	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2082{
2083	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2084	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2085	{
2086		// not necessary, but tests that the API at least basically works
2087		ClearState();
2088	}
2089
2090	/* we do this since otherwise we would have a garbage cycle between this and the device */
2091	virtual ULONG STDMETHODCALLTYPE AddRef()
2092	{
2093		return this->device->AddRef();
2094	}
2095
2096	virtual ULONG STDMETHODCALLTYPE Release()
2097	{
2098		return this->device->Release();
2099	}
2100
2101	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2102	{
2103		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2104	}
2105};
2106
2107static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2108{
2109	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2110}
2111
2112static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2113{
2114	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2115}
2116
2117static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2118{
2119	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2120}
2121
2122static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2123{
2124	delete (GalliumD3D11ImmediateDeviceContext*)context;
2125}
2126#endif
2127