d3d11_context.h revision 3e0f57b6401e7ddd0a5dc89b5b7fdd6c8d85818d
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	BOOL render_predicate_value;
75	float blend_color[4];
76	unsigned sample_mask;
77	unsigned stencil_ref;
78	bool depth_clamp;
79
80	void* default_input_layout;
81	void* default_rasterizer;
82	void* default_depth_stencil;
83	void* default_blend;
84	void* default_sampler;
85	void* ld_sampler;
86	void * default_shaders[D3D11_STAGES];
87
88	// derived state
89	int primitive_mode;
90	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93	struct
94	{
95		void* ld; // accessed with a -1 index from v
96		void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97	} sampler_csos[D3D11_STAGES];
98	struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99	unsigned num_shader_resource_views[D3D11_STAGES];
100	unsigned num_samplers[D3D11_STAGES];
101	unsigned num_vertex_buffers;
102	unsigned num_render_target_views;
103	unsigned num_viewports;
104	unsigned num_scissor_rects;
105	unsigned num_so_targets;
106
107	struct pipe_context* pipe;
108	unsigned update_flags;
109
110	bool owns_pipe;
111	unsigned context_flags;
112
113	GalliumD3D11Caps caps;
114
115	cso_context* cso_ctx;
116	gen_mipmap_state* gen_mipmap;
117
118#if API >= 11
119#define SYNCHRONIZED do {} while(0)
120
121	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123	{
124		caps = device->screen_caps;
125		init_context();
126	}
127
128	~GalliumD3D11DeviceContext()
129	{
130		destroy_context();
131	}
132#else
133#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137	{
138		caps = this->screen_caps;
139		init_context();
140	}
141
142	~GalliumD3D10Device()
143	{
144		destroy_context();
145	}
146#endif
147
148	void init_context()
149	{
150		if(!pipe->begin_query)
151			caps.queries = false;
152		if(!pipe->render_condition)
153			caps.render_condition = false;
154		if(!pipe->bind_gs_state)
155		{
156			caps.gs = false;
157			caps.stages = 2;
158		}
159		if(!pipe->set_stream_output_buffers)
160			caps.so = false;
161
162		update_flags = 0;
163
164		// pipeline state
165		memset(viewports, 0, sizeof(viewports));
166		memset(scissor_rects, 0, sizeof(scissor_rects));
167		memset(so_offsets, 0, sizeof(so_offsets));
168		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
169		index_format = DXGI_FORMAT_UNKNOWN;
170		index_offset = 0;
171		render_predicate_value = 0;
172		memset(blend_color, 0, sizeof(blend_color));
173		sample_mask = ~0;
174		stencil_ref = 0;
175		depth_clamp = 0;
176
177		// derived state
178		primitive_mode = 0;
179		memset(vertex_buffers, 0, sizeof(vertex_buffers));
180		memset(so_buffers, 0, sizeof(so_buffers));
181		memset(sampler_views, 0, sizeof(sampler_views));
182		memset(sampler_csos, 0, sizeof(sampler_csos));
183		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
184		memset(num_samplers, 0, sizeof(num_samplers));
185		num_vertex_buffers = 0;
186		num_render_target_views = 0;
187		num_viewports = 0;
188		num_scissor_rects = 0;
189		num_so_targets = 0;
190
191		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
192
193		struct pipe_rasterizer_state rasterizerd;
194		memset(&rasterizerd, 0, sizeof(rasterizerd));
195		rasterizerd.gl_rasterization_rules = 1;
196		rasterizerd.cull_face = PIPE_FACE_BACK;
197		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
198
199		struct pipe_depth_stencil_alpha_state depth_stencild;
200		memset(&depth_stencild, 0, sizeof(depth_stencild));
201		depth_stencild.depth.enabled = TRUE;
202		depth_stencild.depth.writemask = 1;
203		depth_stencild.depth.func = PIPE_FUNC_LESS;
204		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
205
206		struct pipe_blend_state blendd;
207		memset(&blendd, 0, sizeof(blendd));
208		blendd.rt[0].colormask = 0xf;
209		default_blend = pipe->create_blend_state(pipe, &blendd);
210
211		struct pipe_sampler_state samplerd;
212		memset(&samplerd, 0, sizeof(samplerd));
213		samplerd.normalized_coords = 1;
214		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
215		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
216		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
217		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
218		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
219		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
220		samplerd.border_color[0] = 1.0f;
221		samplerd.border_color[1] = 1.0f;
222		samplerd.border_color[2] = 1.0f;
223		samplerd.border_color[3] = 1.0f;
224		samplerd.min_lod = -FLT_MAX;
225		samplerd.max_lod = FLT_MAX;
226		samplerd.max_anisotropy = 1;
227		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
228
229		memset(&samplerd, 0, sizeof(samplerd));
230		samplerd.normalized_coords = 0;
231		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
232		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
233		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
234		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
235		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
236		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
237		samplerd.min_lod = -FLT_MAX;
238		samplerd.max_lod = FLT_MAX;
239		samplerd.max_anisotropy = 1;
240		ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
241
242		for(unsigned s = 0; s < D3D11_STAGES; ++s)
243		{
244			sampler_csos[s].ld = ld_sampler;
245			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
246				sampler_csos[s].v[i] = default_sampler;
247		}
248
249		// TODO: should this really be empty shaders, or should they be all-passthrough?
250		memset(default_shaders, 0, sizeof(default_shaders));
251		struct ureg_program *ureg;
252		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
253		ureg_END(ureg);
254		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
255
256		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
257		ureg_END(ureg);
258		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
259
260		cso_ctx = cso_create_context(pipe);
261		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
262
263		RestoreGalliumState();
264	}
265
266	void destroy_context()
267	{
268		util_destroy_gen_mipmap(gen_mipmap);
269		cso_destroy_context(cso_ctx);
270
271		pipe->bind_vertex_elements_state(pipe, 0);
272		pipe->delete_vertex_elements_state(pipe, default_input_layout);
273
274		pipe->bind_rasterizer_state(pipe, 0);
275		pipe->delete_rasterizer_state(pipe, default_rasterizer);
276
277		pipe->bind_depth_stencil_alpha_state(pipe, 0);
278		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
279
280		pipe->bind_blend_state(pipe, 0);
281		pipe->delete_blend_state(pipe, default_blend);
282
283		pipe->bind_fragment_sampler_states(pipe, 0, 0);
284		pipe->bind_vertex_sampler_states(pipe, 0, 0);
285		if(pipe->bind_geometry_sampler_states)
286			pipe->bind_geometry_sampler_states(pipe, 0, 0);
287		pipe->delete_sampler_state(pipe, default_sampler);
288		pipe->delete_sampler_state(pipe, ld_sampler);
289
290		pipe->bind_fs_state(pipe, 0);
291		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
292
293		pipe->bind_vs_state(pipe, 0);
294		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
295
296		if(owns_pipe)
297			pipe->destroy(pipe);
298	}
299
300	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
301	{
302		return context_flags;
303	}
304#if API >= 11
305#define SET_SHADER_EXTRA_ARGS , \
306	ID3D11ClassInstance *const *ppClassInstances, \
307	unsigned count
308#define GET_SHADER_EXTRA_ARGS , \
309		ID3D11ClassInstance **ppClassInstances, \
310		unsigned *out_count
311#else
312#define SET_SHADER_EXTRA_ARGS
313#define GET_SHADER_EXTRA_ARGS
314#endif
315
316/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
317 * Instead, you have to pass a pointer to nulls to unbind things.
318 * We do the same.
319 * TODO: is D3D10 the same?
320 */
321	template<unsigned s>
322	void xs_set_shader(GalliumD3D11Shader<>* shader)
323	{
324		if(shader != shaders[s].p)
325		{
326			shaders[s] = shader;
327			void* shader_cso = shader ? shader->object : default_shaders[s];
328			switch(s)
329			{
330			case PIPE_SHADER_VERTEX:
331				pipe->bind_vs_state(pipe, shader_cso);
332				break;
333			case PIPE_SHADER_FRAGMENT:
334				pipe->bind_fs_state(pipe, shader_cso);
335				break;
336			case PIPE_SHADER_GEOMETRY:
337				pipe->bind_gs_state(pipe, shader_cso);
338				break;
339			}
340			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
341		}
342	}
343
344	template<unsigned s>
345	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
346	{
347		for(unsigned i = 0; i < count; ++i)
348		{
349			if(constbufs[i] != constant_buffers[s][i].p)
350			{
351				constant_buffers[s][i] = constbufs[i];
352				if(s < caps.stages && start + i < caps.constant_buffers[s])
353					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
354			}
355		}
356	}
357
358	template<unsigned s>
359	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
360	{
361		int last_different = -1;
362		for(unsigned i = 0; i < count; ++i)
363		{
364			if(shader_resource_views[s][start + i].p != srvs[i])
365			{
366				shader_resource_views[s][start + i] = srvs[i];
367				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
368				last_different = i;
369			}
370		}
371		if(last_different >= 0)
372		{
373			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
374			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
375		}
376	}
377
378	template<unsigned s>
379	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
380	{
381		int last_different = -1;
382		for(unsigned i = 0; i < count; ++i)
383		{
384			if(samplers[s][start + i].p != samps[i])
385			{
386				samplers[s][start + i] = samps[i];
387				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
388			}
389			if(last_different >= 0)
390			{
391				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
392				update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
393			}
394		}
395	}
396
397#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
398	virtual void STDMETHODCALLTYPE XS##SetShader( \
399		ID3D11##Stage##Shader *pShader \
400		SET_SHADER_EXTRA_ARGS) \
401	{ \
402		SYNCHRONIZED; \
403		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
404	} \
405	virtual void STDMETHODCALLTYPE XS##GetShader(\
406		ID3D11##Stage##Shader **ppShader \
407		GET_SHADER_EXTRA_ARGS) \
408	{ \
409		SYNCHRONIZED; \
410		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
411	} \
412	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
413		unsigned start, \
414		unsigned count, \
415		ID3D11Buffer *const* constant_buffers) \
416	{ \
417		SYNCHRONIZED; \
418		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
419	} \
420	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
421		unsigned start, \
422		unsigned count, \
423		ID3D11Buffer **out_constant_buffers) \
424	{ \
425		SYNCHRONIZED; \
426		for(unsigned i = 0; i < count; ++i) \
427			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
428	} \
429	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
430		unsigned start, \
431		unsigned count, \
432		ID3D11ShaderResourceView *const *new_shader_resource_views) \
433	{ \
434		SYNCHRONIZED; \
435		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
436	} \
437	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
438		unsigned start, \
439		unsigned count, \
440		ID3D11ShaderResourceView **out_shader_resource_views) \
441	{ \
442		SYNCHRONIZED; \
443		for(unsigned i = 0; i < count; ++i) \
444			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
445	} \
446	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
447		unsigned start, \
448		unsigned count, \
449		ID3D11SamplerState *const *new_samplers) \
450	{ \
451		SYNCHRONIZED; \
452		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
453	} \
454	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
455		unsigned start, \
456		unsigned count, \
457		ID3D11SamplerState **out_samplers) \
458	{ \
459		SYNCHRONIZED; \
460		for(unsigned i = 0; i < count; ++i) \
461			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
462	}
463
464#define DO_VS(x) x
465#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
466#define DO_PS(x) x
467#define DO_HS(x)
468#define DO_DS(x)
469#define DO_CS(x)
470	IMPLEMENT_SHADER_STAGE(VS, Vertex)
471	IMPLEMENT_SHADER_STAGE(GS, Geometry)
472	IMPLEMENT_SHADER_STAGE(PS, Pixel)
473
474#if API >= 11
475	IMPLEMENT_SHADER_STAGE(HS, Hull)
476	IMPLEMENT_SHADER_STAGE(DS, Domain)
477	IMPLEMENT_SHADER_STAGE(CS, Compute)
478
479	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
480		unsigned start,
481		unsigned count,
482		ID3D11UnorderedAccessView *const *new_unordered_access_views,
483		const unsigned *new_uav_initial_counts)
484	{
485		SYNCHRONIZED;
486		for(unsigned i = 0; i < count; ++i)
487			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
488	}
489
490	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
491		unsigned start,
492		unsigned count,
493		ID3D11UnorderedAccessView **out_unordered_access_views)
494	{
495		SYNCHRONIZED;
496		for(unsigned i = 0; i < count; ++i)
497			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
498	}
499#endif
500
501	template<unsigned s>
502	void update_stage()
503	{
504		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
505		{
506			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
507				--num_shader_resource_views[s];
508			if(s < caps.stages)
509			{
510				struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
511				unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
512				for(unsigned i = 0; i < num_views_to_bind; ++i)
513				{
514					views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
515				}
516				switch(s)
517				{
518				case PIPE_SHADER_VERTEX:
519					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
520					break;
521				case PIPE_SHADER_FRAGMENT:
522					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
523					break;
524				case PIPE_SHADER_GEOMETRY:
525					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
526					break;
527				}
528			}
529		}
530
531		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
532		{
533			while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
534				--num_samplers[s];
535			if(s < caps.stages)
536			{
537				void* samplers_to_bind[PIPE_MAX_SAMPLERS];
538				unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
539				for(unsigned i = 0; i < num_samplers_to_bind; ++i)
540				{
541					// index can be -1 to access sampler_csos[s].ld
542					samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
543				}
544				switch(s)
545				{
546				case PIPE_SHADER_VERTEX:
547					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
548					break;
549				case PIPE_SHADER_FRAGMENT:
550					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
551					break;
552				case PIPE_SHADER_GEOMETRY:
553					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
554					break;
555				}
556			}
557		}
558	}
559
560	void update_state()
561	{
562		update_stage<D3D11_STAGE_PS>();
563		update_stage<D3D11_STAGE_VS>();
564		update_stage<D3D11_STAGE_GS>();
565#if API >= 11
566		update_stage<D3D11_STAGE_HS>();
567		update_stage<D3D11_STAGE_DS>();
568		update_stage<D3D11_STAGE_CS>();
569#endif
570
571		if(update_flags & UPDATE_VERTEX_BUFFERS)
572		{
573			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
574				--num_vertex_buffers;
575			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
576		}
577
578		update_flags = 0;
579	}
580
581	virtual void STDMETHODCALLTYPE IASetInputLayout(
582		ID3D11InputLayout *new_input_layout)
583	{
584		SYNCHRONIZED;
585		if(new_input_layout != input_layout.p)
586		{
587			input_layout = new_input_layout;
588			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
589		}
590	}
591
592	virtual void STDMETHODCALLTYPE IAGetInputLayout(
593		ID3D11InputLayout **out_input_layout)
594	{
595		SYNCHRONIZED;
596		*out_input_layout = input_layout.ref();
597	}
598
599	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
600		unsigned start,
601		unsigned count,
602		ID3D11Buffer *const *new_vertex_buffers,
603		const unsigned *new_strides,
604		const unsigned *new_offsets)
605	{
606		SYNCHRONIZED;
607		int last_different = -1;
608		for(unsigned i = 0; i < count; ++i)
609		{
610			ID3D11Buffer* buffer = new_vertex_buffers[i];
611			if(buffer != input_buffers[start + i].p
612				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
613				|| vertex_buffers[start + i].stride != new_offsets[i]
614			)
615			{
616				input_buffers[start + i] = buffer;
617				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
618				vertex_buffers[start + i].buffer_offset = new_offsets[i];
619				vertex_buffers[start + i].stride = new_strides[i];
620				vertex_buffers[start + i].max_index = ~0;
621				last_different = i;
622			}
623		}
624		if(last_different >= 0)
625		{
626			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
627			update_flags |= UPDATE_VERTEX_BUFFERS;
628		}
629	}
630
631	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
632		unsigned start,
633		unsigned count,
634		ID3D11Buffer **out_vertex_buffers,
635		unsigned *out_strides,
636		unsigned *out_offsets)
637	{
638		SYNCHRONIZED;
639		if(out_vertex_buffers)
640		{
641			for(unsigned i = 0; i < count; ++i)
642				out_vertex_buffers[i] = input_buffers[start + i].ref();
643		}
644
645		if(out_offsets)
646		{
647			for(unsigned i = 0; i < count; ++i)
648				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
649		}
650
651		if(out_strides)
652		{
653			for(unsigned i = 0; i < count; ++i)
654				out_strides[i] = vertex_buffers[start + i].stride;
655		}
656	}
657
658	void set_index_buffer()
659	{
660		pipe_index_buffer ib;
661		if(!index_buffer)
662		{
663			memset(&ib, 0, sizeof(ib));
664		}
665		else
666		{
667			if(index_format == DXGI_FORMAT_R32_UINT)
668				ib.index_size = 4;
669			else if(index_format == DXGI_FORMAT_R16_UINT)
670				ib.index_size = 2;
671			else
672				ib.index_size = 1;
673			ib.offset = index_offset;
674			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
675		}
676		pipe->set_index_buffer(pipe, &ib);
677	}
678
679	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
680		ID3D11Buffer *new_index_buffer,
681		DXGI_FORMAT new_index_format,
682		unsigned new_index_offset)
683	{
684		SYNCHRONIZED;
685		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
686		{
687			index_buffer = new_index_buffer;
688			index_format = new_index_format;
689			index_offset = new_index_offset;
690
691			set_index_buffer();
692		}
693	}
694
695	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
696		ID3D11Buffer **out_index_buffer,
697		DXGI_FORMAT *out_index_format,
698		unsigned *out_index_offset)
699	{
700		SYNCHRONIZED;
701		if(out_index_buffer)
702			*out_index_buffer = index_buffer.ref();
703		if(out_index_format)
704			*out_index_format = index_format;
705		if(out_index_offset)
706			*out_index_offset = index_offset;
707	}
708
709	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
710		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
711	{
712		SYNCHRONIZED;
713		if(primitive_topology != new_primitive_topology)
714		{
715			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
716				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
717			else
718				primitive_mode = 0;
719			primitive_topology = new_primitive_topology;
720		}
721	}
722
723	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
724		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
725	{
726		SYNCHRONIZED;
727		*out_primitive_topology = primitive_topology;
728	}
729
730	virtual void STDMETHODCALLTYPE DrawIndexed(
731		unsigned index_count,
732		unsigned start_index_location,
733		int base_vertex_location)
734	{
735		SYNCHRONIZED;
736		if(update_flags)
737			update_state();
738
739		pipe_draw_info info;
740		info.mode = primitive_mode;
741		info.indexed = TRUE;
742		info.count = index_count;
743		info.start = start_index_location;
744		info.index_bias = base_vertex_location;
745		info.min_index = 0;
746		info.max_index = ~0;
747		info.start_instance = 0;
748		info.instance_count = 1;
749
750		pipe->draw_vbo(pipe, &info);
751	}
752
753	virtual void STDMETHODCALLTYPE Draw(
754		unsigned vertex_count,
755		unsigned start_vertex_location)
756	{
757		SYNCHRONIZED;
758		if(update_flags)
759			update_state();
760
761		pipe_draw_info info;
762		info.mode = primitive_mode;
763		info.indexed = FALSE;
764		info.count = vertex_count;
765		info.start = start_vertex_location;
766		info.index_bias = 0;
767		info.min_index = 0;
768		info.max_index = ~0;
769		info.start_instance = 0;
770		info.instance_count = 1;
771
772		pipe->draw_vbo(pipe, &info);
773	}
774
775	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
776		unsigned index_countPerInstance,
777		unsigned instance_count,
778		unsigned start_index_location,
779		int base_vertex_location,
780		unsigned start_instance_location)
781	{
782		SYNCHRONIZED;
783		if(update_flags)
784			update_state();
785
786		pipe_draw_info info;
787		info.mode = primitive_mode;
788		info.indexed = TRUE;
789		info.count = index_countPerInstance;
790		info.start = start_index_location;
791		info.index_bias = base_vertex_location;
792		info.min_index = 0;
793		info.max_index = ~0;
794		info.start_instance = start_instance_location;
795		info.instance_count = instance_count;
796
797		pipe->draw_vbo(pipe, &info);
798	}
799
800	virtual void STDMETHODCALLTYPE DrawInstanced(
801		unsigned vertex_countPerInstance,
802		unsigned instance_count,
803		unsigned start_vertex_location,
804		unsigned start_instance_location)
805	{
806		SYNCHRONIZED;
807		if(update_flags)
808			update_state();
809
810		pipe_draw_info info;
811		info.mode = primitive_mode;
812		info.indexed = FALSE;
813		info.count = vertex_countPerInstance;
814		info.start = start_vertex_location;
815		info.index_bias = 0;
816		info.min_index = 0;
817		info.max_index = ~0;
818		info.start_instance = start_instance_location;
819		info.instance_count = instance_count;
820
821		pipe->draw_vbo(pipe, &info);
822	}
823
824	virtual void STDMETHODCALLTYPE DrawAuto(void)
825	{
826		if(!caps.so)
827			return;
828
829		SYNCHRONIZED;
830		if(update_flags)
831			update_state();
832
833		pipe->draw_stream_output(pipe, primitive_mode);
834	}
835
836	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
837		ID3D11Buffer *buffer,
838		unsigned aligned_byte_offset)
839	{
840		SYNCHRONIZED;
841		if(update_flags)
842			update_state();
843
844		struct {
845			unsigned count;
846			unsigned instance_count;
847			unsigned start;
848			unsigned index_bias;
849		} data;
850
851		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
852
853		pipe_draw_info info;
854		info.mode = primitive_mode;
855		info.indexed = TRUE;
856		info.start = data.start;
857		info.count = data.count;
858		info.index_bias = data.index_bias;
859		info.min_index = 0;
860		info.max_index = ~0;
861		info.start_instance = 0;
862		info.instance_count = data.instance_count;
863
864		pipe->draw_vbo(pipe, &info);
865	}
866
867	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
868		ID3D11Buffer *buffer,
869		unsigned aligned_byte_offset)
870	{
871		SYNCHRONIZED;
872		if(update_flags)
873			update_state();
874
875		struct {
876			unsigned count;
877			unsigned instance_count;
878			unsigned start;
879		} data;
880
881		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
882
883		pipe_draw_info info;
884		info.mode = primitive_mode;
885		info.indexed = FALSE;
886		info.start = data.start;
887		info.count = data.count;
888		info.index_bias = 0;
889		info.min_index = 0;
890		info.max_index = ~0;
891		info.start_instance = 0;
892		info.instance_count = data.instance_count;
893
894		pipe->draw_vbo(pipe, &info);
895	}
896
897#if API >= 11
898	virtual void STDMETHODCALLTYPE Dispatch(
899		unsigned thread_group_count_x,
900		unsigned thread_group_count_y,
901		unsigned thread_group_count_z)
902	{
903// uncomment this when this is implemented
904//		SYNCHRONIZED;
905//		if(update_flags)
906//			update_state();
907	}
908
909	virtual void STDMETHODCALLTYPE DispatchIndirect(
910		ID3D11Buffer *buffer,
911		unsigned aligned_byte_offset)
912	{
913// uncomment this when this is implemented
914//		SYNCHRONIZED;
915//		if(update_flags)
916//			update_state();
917	}
918#endif
919
920	void set_clip()
921	{
922		pipe_clip_state clip;
923		clip.nr = 0;
924		clip.depth_clamp = depth_clamp;
925		pipe->set_clip_state(pipe, &clip);
926	}
927
928	virtual void STDMETHODCALLTYPE RSSetState(
929		ID3D11RasterizerState *new_rasterizer_state)
930	{
931		SYNCHRONIZED;
932		if(new_rasterizer_state != rasterizer_state.p)
933		{
934			rasterizer_state = new_rasterizer_state;
935			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
936			bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
937			if(depth_clamp != new_depth_clamp)
938			{
939				depth_clamp = new_depth_clamp;
940				set_clip();
941			}
942		}
943	}
944
945	virtual void STDMETHODCALLTYPE RSGetState(
946		ID3D11RasterizerState **out_rasterizer_state)
947	{
948		SYNCHRONIZED;
949		*out_rasterizer_state = rasterizer_state.ref();
950	}
951
952	void set_viewport()
953	{
954		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
955		pipe_viewport_state viewport;
956		float half_width = viewports[0].Width * 0.5f;
957		float half_height = viewports[0].Height * 0.5f;
958
959		viewport.scale[0] = half_width;
960		viewport.scale[1] = -half_height;
961		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
962		viewport.scale[3] = 1.0f;
963		viewport.translate[0] = half_width + viewports[0].TopLeftX;
964		viewport.translate[1] = half_height + viewports[0].TopLeftY;
965		viewport.translate[2] = viewports[0].MinDepth;
966		viewport.translate[3] = 1.0f;
967		pipe->set_viewport_state(pipe, &viewport);
968	}
969
970	virtual void STDMETHODCALLTYPE RSSetViewports(
971		unsigned count,
972		const D3D11_VIEWPORT *new_viewports)
973	{
974		SYNCHRONIZED;
975		if(count)
976		{
977			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
978			{
979				viewports[0] = new_viewports[0];
980				set_viewport();
981			}
982			for(unsigned i = 1; i < count; ++i)
983				viewports[i] = new_viewports[i];
984		}
985		else if(num_viewports)
986		{
987			// TODO: what should we do here?
988			memset(&viewports[0], 0, sizeof(viewports[0]));
989			set_viewport();
990		}
991		num_viewports = count;
992	}
993
994	virtual void STDMETHODCALLTYPE RSGetViewports(
995		unsigned *out_count,
996		D3D11_VIEWPORT *out_viewports)
997	{
998		SYNCHRONIZED;
999		if(out_viewports)
1000		{
1001			unsigned i;
1002			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1003				out_viewports[i] = viewports[i];
1004
1005			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1006		}
1007
1008		*out_count = num_viewports;
1009	}
1010
1011	void set_scissor()
1012	{
1013		pipe_scissor_state scissor;
1014		scissor.minx = scissor_rects[0].left;
1015		scissor.miny = scissor_rects[0].top;
1016		scissor.maxx = scissor_rects[0].right;
1017		scissor.maxy = scissor_rects[0].bottom;
1018		pipe->set_scissor_state(pipe, &scissor);
1019	}
1020
1021	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1022		unsigned count,
1023		const D3D11_RECT *new_rects)
1024	{
1025		SYNCHRONIZED;
1026		if(count)
1027		{
1028			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1029			{
1030				scissor_rects[0] = new_rects[0];
1031				set_scissor();
1032			}
1033			for(unsigned i = 1; i < count; ++i)
1034				scissor_rects[i] = new_rects[i];
1035		}
1036		else if(num_scissor_rects)
1037		{
1038			// TODO: what should we do here?
1039			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1040			set_scissor();
1041		}
1042
1043		num_scissor_rects = count;
1044	}
1045
1046	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1047		unsigned *out_count,
1048		D3D11_RECT *out_rects)
1049	{
1050		SYNCHRONIZED;
1051		if(out_rects)
1052		{
1053			unsigned i;
1054			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1055				out_rects[i] = scissor_rects[i];
1056
1057			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1058		}
1059
1060		*out_count = num_scissor_rects;
1061	}
1062
1063	virtual void STDMETHODCALLTYPE OMSetBlendState(
1064		ID3D11BlendState *new_blend_state,
1065		const float new_blend_factor[4],
1066		unsigned new_sample_mask)
1067	{
1068		SYNCHRONIZED;
1069		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1070
1071		if(blend_state.p != new_blend_state)
1072		{
1073			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1074			blend_state = new_blend_state;
1075		}
1076
1077		// Windows D3D11 does this, even though it's apparently undocumented
1078		if(!new_blend_factor)
1079			new_blend_factor = white;
1080
1081		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1082		{
1083			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1084			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1085		}
1086
1087		if(sample_mask != new_sample_mask)
1088		{
1089			pipe->set_sample_mask(pipe, new_sample_mask);
1090			sample_mask = new_sample_mask;
1091		}
1092	}
1093
1094	virtual void STDMETHODCALLTYPE OMGetBlendState(
1095		ID3D11BlendState **out_blend_state,
1096		float out_blend_factor[4],
1097		unsigned *out_sample_mask)
1098	{
1099		SYNCHRONIZED;
1100		if(out_blend_state)
1101			*out_blend_state = blend_state.ref();
1102		if(out_blend_factor)
1103			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1104		if(out_sample_mask)
1105			*out_sample_mask = sample_mask;
1106	}
1107
1108	void set_stencil_ref()
1109	{
1110		struct pipe_stencil_ref sref;
1111		sref.ref_value[0] = stencil_ref;
1112		sref.ref_value[1] = stencil_ref;
1113		pipe->set_stencil_ref(pipe, &sref);
1114	}
1115
1116	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1117		ID3D11DepthStencilState *new_depth_stencil_state,
1118		unsigned new_stencil_ref)
1119	{
1120		SYNCHRONIZED;
1121		if(new_depth_stencil_state != depth_stencil_state.p)
1122		{
1123			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1124			depth_stencil_state = new_depth_stencil_state;
1125		}
1126
1127		if(new_stencil_ref != stencil_ref)
1128		{
1129			stencil_ref = new_stencil_ref;
1130			set_stencil_ref();
1131		}
1132	}
1133
1134	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1135		ID3D11DepthStencilState **out_depth_stencil_state,
1136		unsigned *out_stencil_ref)
1137	{
1138		SYNCHRONIZED;
1139		if(*out_depth_stencil_state)
1140			*out_depth_stencil_state = depth_stencil_state.ref();
1141		if(out_stencil_ref)
1142			*out_stencil_ref = stencil_ref;
1143	}
1144
1145	void set_framebuffer()
1146	{
1147		struct pipe_framebuffer_state fb;
1148		memset(&fb, 0, sizeof(fb));
1149		if(depth_stencil_view)
1150		{
1151			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1152			fb.zsbuf = surf;
1153			if(surf->width > fb.width)
1154				fb.width = surf->width;
1155			if(surf->height > fb.height)
1156				fb.height = surf->height;
1157		}
1158		fb.nr_cbufs = num_render_target_views;
1159		unsigned i;
1160		for(i = 0; i < num_render_target_views; ++i)
1161		{
1162			if(render_target_views[i])
1163			{
1164				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1165				fb.cbufs[i] = surf;
1166				if(surf->width > fb.width)
1167					fb.width = surf->width;
1168				if(surf->height > fb.height)
1169					fb.height = surf->height;
1170			}
1171		}
1172
1173		pipe->set_framebuffer_state(pipe, &fb);
1174	}
1175
1176	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1177	 * Hopefully nobody relies on this happening
1178	 */
1179
1180	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1181		unsigned count,
1182		ID3D11RenderTargetView *const *new_render_target_views,
1183		ID3D11DepthStencilView  *new_depth_stencil_view)
1184	{
1185		SYNCHRONIZED;
1186		if(!new_render_target_views)
1187			count = 0;
1188		if(count == num_render_target_views)
1189		{
1190			for(unsigned i = 0; i < count; ++i)
1191			{
1192				if(new_render_target_views[i] != render_target_views[i].p)
1193					goto changed;
1194			}
1195			return;
1196		}
1197changed:
1198		depth_stencil_view = new_depth_stencil_view;
1199		unsigned i;
1200		for(i = 0; i < count; ++i)
1201		{
1202			render_target_views[i] = new_render_target_views[i];
1203#if API >= 11
1204			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1205#endif
1206		}
1207		for(; i < num_render_target_views; ++i)
1208			render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1209		num_render_target_views = count;
1210		set_framebuffer();
1211	}
1212
1213	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1214		unsigned count,
1215		ID3D11RenderTargetView **out_render_target_views,
1216		ID3D11DepthStencilView  **out_depth_stencil_view)
1217	{
1218		SYNCHRONIZED;
1219		if(out_render_target_views)
1220		{
1221			unsigned i;
1222			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1223				out_render_target_views[i] = render_target_views[i].ref();
1224
1225			for(; i < count; ++i)
1226				out_render_target_views[i] = 0;
1227		}
1228
1229		if(out_depth_stencil_view)
1230			*out_depth_stencil_view = depth_stencil_view.ref();
1231	}
1232
1233#if API >= 11
1234	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1235	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1236		unsigned rtv_count,
1237		ID3D11RenderTargetView *const *new_render_target_views,
1238		ID3D11DepthStencilView  *new_depth_stencil_view,
1239		unsigned uav_start,
1240		unsigned uav_count,
1241		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1242		const unsigned *new_uav_initial_counts)
1243	{
1244		SYNCHRONIZED;
1245		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1246			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1247
1248		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1249		{
1250			for(unsigned i = 0; i < uav_count; ++i)
1251			{
1252				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1253				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1254			}
1255		}
1256	}
1257
1258	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1259		unsigned rtv_count,
1260		ID3D11RenderTargetView **out_render_target_views,
1261		ID3D11DepthStencilView  **out_depth_stencil_view,
1262		unsigned uav_start,
1263		unsigned uav_count,
1264		ID3D11UnorderedAccessView **out_unordered_access_views)
1265	{
1266		SYNCHRONIZED;
1267		if(out_render_target_views)
1268			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1269
1270		if(out_unordered_access_views)
1271		{
1272			for(unsigned i = 0; i < uav_count; ++i)
1273				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1274		}
1275	}
1276#endif
1277
1278	virtual void STDMETHODCALLTYPE SOSetTargets(
1279		unsigned count,
1280		ID3D11Buffer *const *new_so_targets,
1281		const unsigned *new_offsets)
1282	{
1283		SYNCHRONIZED;
1284		unsigned i;
1285		if(!new_so_targets)
1286			count = 0;
1287		bool changed = false;
1288		for(i = 0; i < count; ++i)
1289		{
1290			ID3D11Buffer* buffer = new_so_targets[i];
1291			if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1292			{
1293				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1294				so_targets[i] = buffer;
1295				so_offsets[i] = new_offsets[i];
1296				changed = true;
1297			}
1298		}
1299		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1300		{
1301			if(so_targets[i].p || so_offsets[i])
1302			{
1303				changed = true;
1304				so_targets[i] = (ID3D11Buffer*)0;
1305				so_offsets[i] = 0;
1306			}
1307		}
1308		num_so_targets = count;
1309
1310		if(changed && caps.so)
1311			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1312	}
1313
1314	virtual void STDMETHODCALLTYPE SOGetTargets(
1315		unsigned count,
1316		ID3D11Buffer **out_so_targets
1317#if API < 11
1318		, UINT *out_offsets
1319#endif
1320		)
1321	{
1322		SYNCHRONIZED;
1323		for(unsigned i = 0; i < count; ++i)
1324		{
1325			out_so_targets[i] = so_targets[i].ref();
1326#if API < 11
1327			out_offsets[i] = so_offsets[i];
1328#endif
1329		}
1330	}
1331
1332	virtual void STDMETHODCALLTYPE Begin(
1333		ID3D11Asynchronous *async)
1334	{
1335		SYNCHRONIZED;
1336		if(caps.queries)
1337			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1338	}
1339
1340	virtual void STDMETHODCALLTYPE End(
1341		ID3D11Asynchronous *async)
1342	{
1343		SYNCHRONIZED;
1344		if(caps.queries)
1345			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1346	}
1347
1348	virtual HRESULT STDMETHODCALLTYPE GetData(
1349		ID3D11Asynchronous *iasync,
1350		void *out_data,
1351		unsigned data_size,
1352		unsigned get_data_flags)
1353	{
1354		SYNCHRONIZED;
1355		if(!caps.queries)
1356			return E_NOTIMPL;
1357
1358		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1359		void* tmp_data = alloca(async->data_size);
1360		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1361		if(out_data)
1362			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1363		return ret ? S_OK : S_FALSE;
1364	}
1365
1366	void set_render_condition()
1367	{
1368		if(caps.render_condition)
1369		{
1370			if(!render_predicate)
1371				pipe->render_condition(pipe, 0, 0);
1372			else
1373			{
1374				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1375				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1376				{
1377					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1378					pipe->render_condition(pipe, predicate->query, mode);
1379				}
1380				else
1381				{
1382					/* TODO: add inverted predication to Gallium*/
1383					pipe->render_condition(pipe, 0, 0);
1384				}
1385			}
1386		}
1387	}
1388
1389	virtual void STDMETHODCALLTYPE SetPredication(
1390		ID3D11Predicate *new_predicate,
1391		BOOL new_predicate_value)
1392	{
1393		SYNCHRONIZED;
1394		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1395		{
1396			render_predicate = new_predicate;
1397			render_predicate_value = new_predicate_value;
1398			set_render_condition();
1399		}
1400	}
1401
1402	virtual void STDMETHODCALLTYPE GetPredication(
1403		ID3D11Predicate **out_predicate,
1404		BOOL *out_predicate_value)
1405	{
1406		SYNCHRONIZED;
1407		if(out_predicate)
1408			*out_predicate = render_predicate.ref();
1409		if(out_predicate_value)
1410			*out_predicate_value = render_predicate_value;
1411	}
1412
1413	static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource)
1414	{
1415		pipe_subresource sr;
1416		if(subresource <= resource->last_level)
1417		{
1418			sr.level = subresource;
1419			sr.face = 0;
1420		}
1421		else
1422		{
1423			unsigned levels = resource->last_level + 1;
1424			sr.level = subresource % levels;
1425			sr.face = subresource / levels;
1426		}
1427		return sr;
1428	}
1429
1430	virtual HRESULT STDMETHODCALLTYPE Map(
1431		ID3D11Resource *iresource,
1432		unsigned subresource,
1433		D3D11_MAP map_type,
1434		unsigned map_flags,
1435		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1436	{
1437		SYNCHRONIZED;
1438		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1439		if(resource->transfers.count(subresource))
1440			return E_FAIL;
1441		pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, subresource);
1442		pipe_box box;
1443		d3d11_to_pipe_box(resource->resource, sr.level, 0);
1444		unsigned usage = 0;
1445		if(map_type == D3D11_MAP_READ)
1446			usage = PIPE_TRANSFER_READ;
1447		else if(map_type == D3D11_MAP_WRITE)
1448			usage = PIPE_TRANSFER_WRITE;
1449		else if(map_type == D3D11_MAP_READ_WRITE)
1450			usage = PIPE_TRANSFER_READ_WRITE;
1451		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1452			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1453		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1454			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1455		else
1456			return E_INVALIDARG;
1457		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1458			usage |= PIPE_TRANSFER_DONTBLOCK;
1459		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box);
1460		if(!transfer) {
1461			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1462				return DXGI_ERROR_WAS_STILL_DRAWING;
1463			else
1464				return E_FAIL;
1465		}
1466		resource->transfers[subresource] = transfer;
1467		pipe->transfer_map(pipe, transfer);
1468		mapped_resource->pData = transfer->data;
1469		mapped_resource->RowPitch = transfer->stride;
1470		mapped_resource->DepthPitch = transfer->slice_stride;
1471		return S_OK;
1472	}
1473
1474	virtual void STDMETHODCALLTYPE Unmap(
1475		ID3D11Resource *iresource,
1476		unsigned subresource)
1477	{
1478		SYNCHRONIZED;
1479		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1480		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1481		if(i != resource->transfers.end())
1482		{
1483			pipe->transfer_unmap(pipe, i->second);
1484			pipe->transfer_destroy(pipe, i->second);
1485			resource->transfers.erase(i);
1486		}
1487	}
1488
1489	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1490		ID3D11Resource *dst_resource,
1491		unsigned dst_subresource,
1492		unsigned dst_x,
1493		unsigned dst_y,
1494		unsigned dst_z,
1495		ID3D11Resource *src_resource,
1496		unsigned src_subresource,
1497		const D3D11_BOX *src_box)
1498	{
1499		SYNCHRONIZED;
1500		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1501		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1502		pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource);
1503		pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, src_subresource);
1504		pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, src_box);
1505		for(unsigned i = 0; i < box.depth; ++i)
1506		{
1507			pipe->resource_copy_region(pipe,
1508				dst->resource, subdst, dst_x, dst_y, dst_z + i,
1509				src->resource, subsrc, box.x, box.y, box.z + i,
1510				box.width, box.height);
1511		}
1512	}
1513
1514	virtual void STDMETHODCALLTYPE CopyResource(
1515		ID3D11Resource *dst_resource,
1516		ID3D11Resource *src_resource)
1517	{
1518		SYNCHRONIZED;
1519		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1520		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1521		pipe_subresource sr;
1522		unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1;
1523
1524		for(sr.face = 0; sr.face < faces; ++sr.face)
1525		{
1526			for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level)
1527			{
1528				unsigned w = u_minify(dst->resource->width0, sr.level);
1529				unsigned h = u_minify(dst->resource->height0, sr.level);
1530				unsigned d = u_minify(dst->resource->depth0, sr.level);
1531				for(unsigned i = 0; i < d; ++i)
1532				{
1533					pipe->resource_copy_region(pipe,
1534							dst->resource, sr, 0, 0, i,
1535							src->resource, sr, 0, 0, i,
1536							w, h);
1537				}
1538			}
1539		}
1540	}
1541
1542	virtual void STDMETHODCALLTYPE UpdateSubresource(
1543		ID3D11Resource *dst_resource,
1544		unsigned dst_subresource,
1545		const D3D11_BOX *pDstBox,
1546		const void *pSrcData,
1547		unsigned src_row_pitch,
1548		unsigned src_depth_pitch)
1549	{
1550		SYNCHRONIZED;
1551		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1552		pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource);
1553		pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox);
1554		pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1555	}
1556
1557#if API >= 11
1558	virtual void STDMETHODCALLTYPE CopyStructureCount(
1559		ID3D11Buffer *dst_buffer,
1560		unsigned dst_aligned_byte_offset,
1561		ID3D11UnorderedAccessView *src_view)
1562	{
1563		SYNCHRONIZED;
1564	}
1565#endif
1566
1567	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1568		ID3D11RenderTargetView *render_target_view,
1569		const float color[4])
1570	{
1571		SYNCHRONIZED;
1572		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1573		pipe->clear_render_target(pipe, view->object, color, 0, 0, view->object->width, view->object->height);
1574	}
1575
1576	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1577		ID3D11DepthStencilView  *depth_stencil_view,
1578		unsigned clear_flags,
1579		float depth,
1580		UINT8 stencil)
1581	{
1582		SYNCHRONIZED;
1583		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1584		unsigned flags = 0;
1585		if(clear_flags & D3D11_CLEAR_DEPTH)
1586			flags |= PIPE_CLEAR_DEPTH;
1587		if(clear_flags & D3D11_CLEAR_STENCIL)
1588			flags |= PIPE_CLEAR_STENCIL;
1589		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1590	}
1591
1592#if API >= 11
1593	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1594		ID3D11UnorderedAccessView *unordered_access_view,
1595		const unsigned values[4])
1596	{
1597		SYNCHRONIZED;
1598	}
1599
1600	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1601			ID3D11UnorderedAccessView *unordered_access_view,
1602			const float values[4])
1603	{
1604		SYNCHRONIZED;
1605	}
1606#endif
1607
1608	void restore_gallium_state_blit_only()
1609	{
1610		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1611		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1612		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1613		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1614		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1615		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1616		if(caps.gs)
1617			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1618		set_framebuffer();
1619		set_viewport();
1620		set_clip();
1621		set_render_condition();
1622		// TODO: restore stream output
1623
1624		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1625	}
1626
1627	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1628	{
1629		SYNCHRONIZED;
1630		restore_gallium_state_blit_only();
1631	}
1632
1633	virtual void STDMETHODCALLTYPE GenerateMips(
1634		ID3D11ShaderResourceView *shader_resource_view)
1635	{
1636		SYNCHRONIZED;
1637
1638		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1639		if(caps.gs)
1640			pipe->bind_gs_state(pipe, 0);
1641		if(caps.so)
1642			pipe->bind_stream_output_state(pipe, 0);
1643		if(pipe->render_condition)
1644			pipe->render_condition(pipe, 0, 0);
1645		util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1646		restore_gallium_state_blit_only();
1647	}
1648
1649	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1650	{
1651		SYNCHRONIZED;
1652		restore_gallium_state_blit_only();
1653
1654		set_index_buffer();
1655		set_stencil_ref();
1656		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1657		pipe->set_sample_mask(pipe, sample_mask);
1658
1659		for(unsigned s = 0; s < 3; ++s)
1660		{
1661			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1662			for(unsigned i = 0; i < num; ++i)
1663				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1664		}
1665
1666		if(caps.so)
1667			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1668
1669		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1670		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1671
1672		set_scissor();
1673	}
1674
1675#if API >= 11
1676	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1677	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1678		ID3D11Resource *iresource,
1679		float min_lod)
1680	{
1681		SYNCHRONIZED;
1682		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1683		if(resource->min_lod != min_lod)
1684		{
1685			// TODO: actually do anything?
1686			resource->min_lod = min_lod;
1687		}
1688	}
1689
1690	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1691		ID3D11Resource *iresource)
1692	{
1693		SYNCHRONIZED;
1694		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1695		return resource->min_lod;
1696	}
1697#endif
1698
1699	virtual void STDMETHODCALLTYPE ResolveSubresource(
1700		ID3D11Resource *dst_resource,
1701		unsigned dst_subresource,
1702		ID3D11Resource *src_resource,
1703		unsigned src_subresource,
1704		DXGI_FORMAT format)
1705	{
1706		SYNCHRONIZED;
1707		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1708		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1709		pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, dst_subresource);
1710		pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, src_subresource);
1711		pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc);
1712	}
1713
1714#if API >= 11
1715	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1716		ID3D11CommandList *command_list,
1717		BOOL restore_context_state)
1718	{
1719		SYNCHRONIZED;
1720	}
1721
1722	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1723		BOOL restore_deferred_context_state,
1724		ID3D11CommandList **out_command_list)
1725	{
1726		SYNCHRONIZED;
1727		return E_NOTIMPL;
1728	}
1729#endif
1730
1731	virtual void STDMETHODCALLTYPE ClearState(void)
1732	{
1733		/* we don't take a lock here because we would deadlock otherwise
1734		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1735		 * However, I can't think of any correct usage that would be affected by this
1736		 * being non-atomic, and making this atomic is quite expensive and complicates
1737		 * the code
1738		 */
1739
1740		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1741		// TODO: make sure all this gets inlined, which might require more compiler flags
1742		// TODO: optimize this
1743#if API >= 11
1744		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1745		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1746		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1747		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1748		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1749		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1750#else
1751		GalliumD3D11DeviceContext::PSSetShader(0);
1752		GalliumD3D11DeviceContext::GSSetShader(0);
1753		GalliumD3D11DeviceContext::VSSetShader(0);
1754#endif
1755
1756		GalliumD3D11DeviceContext::IASetInputLayout(0);
1757		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1758		GalliumD3D11DeviceContext::RSSetState(0);
1759		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1760		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1761		GalliumD3D11DeviceContext::SetPredication(0, 0);
1762		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1763
1764		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1765		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1766		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1767#if API >= 11
1768		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1769		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1770		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1771#endif
1772
1773		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1774#if API >= 11
1775		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1776#else
1777		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1778#endif
1779		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1780
1781		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1782		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1783		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1784#if API >= 11
1785		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1786		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1787		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1788#endif
1789
1790		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1791		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1792		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1793#if API >= 11
1794		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1795		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1796		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1797#endif
1798
1799		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1800		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1801	}
1802
1803	virtual void STDMETHODCALLTYPE Flush(void)
1804	{
1805		SYNCHRONIZED;
1806		pipe->flush(pipe, PIPE_FLUSH_FRAME, 0);
1807	}
1808
1809	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1810	 * cleanly unbound from the pipeline.
1811	 * In Direct3D 11, the pipeline holds a reference.
1812	 *
1813	 * Note that instead of always scanning the pipeline on destruction, we could
1814	 * maintain the internal reference count on DirectX 10 and use it to check if an
1815	 * object is still bound.
1816	 * Presumably, on average, scanning is faster if the application is well written.
1817	 */
1818#if API < 11
1819#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1820	void Unbind##name(ID3D11##name* state) \
1821	{ \
1822		SYNCHRONIZED; \
1823		if((void*)state == (void*)member.p) \
1824		{ \
1825			member.p = 0; \
1826			pipe->bind_##gallium##_state(pipe, default_##def); \
1827		} \
1828	}
1829	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1830	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1831	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1832	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1833	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1834	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1835	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1836
1837	void UnbindPredicate(ID3D11Predicate* predicate)
1838	{
1839		SYNCHRONIZED;
1840		if(predicate == render_predicate)
1841		{
1842			render_predicate.p = NULL;
1843			render_predicate_value = 0;
1844			pipe->render_condition(pipe, 0, 0);
1845		}
1846	}
1847
1848	void UnbindSamplerState(ID3D11SamplerState* state)
1849	{
1850		SYNCHRONIZED;
1851		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1852		{
1853			for(unsigned i = 0; i < num_samplers[s]; ++i)
1854			{
1855				if(samplers[s][i] == state)
1856				{
1857					samplers[s][i].p = NULL;
1858					sampler_csos[s].v[i] = NULL;
1859					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1860				}
1861			}
1862		}
1863	}
1864
1865	void UnbindBuffer(ID3D11Buffer* buffer)
1866	{
1867		SYNCHRONIZED;
1868		if(buffer == index_buffer)
1869		{
1870			index_buffer.p = 0;
1871			index_format = DXGI_FORMAT_UNKNOWN;
1872			index_offset = 0;
1873			struct pipe_index_buffer ib;
1874			memset(&ib, 0, sizeof(ib));
1875			pipe->set_index_buffer(pipe, &ib);
1876		}
1877
1878		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1879		{
1880			if(buffer == input_buffers[i])
1881			{
1882				input_buffers[i].p = 0;
1883				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1884				update_flags |= UPDATE_VERTEX_BUFFERS;
1885			}
1886		}
1887
1888		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1889		{
1890			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1891			{
1892				if(constant_buffers[s][i] == buffer)
1893				{
1894					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1895					pipe->set_constant_buffer(pipe, s, i, NULL);
1896				}
1897			}
1898		}
1899	}
1900
1901	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1902	{
1903		SYNCHRONIZED;
1904		if(view == depth_stencil_view)
1905		{
1906			depth_stencil_view.p = NULL;
1907			set_framebuffer();
1908		}
1909	}
1910
1911	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1912	{
1913		SYNCHRONIZED;
1914		bool any_bound = false;
1915		for(unsigned i = 0; i < num_render_target_views; ++i)
1916		{
1917			if(render_target_views[i] == view)
1918			{
1919				render_target_views[i].p = NULL;
1920				any_bound = true;
1921			}
1922		}
1923		if(any_bound)
1924			set_framebuffer();
1925	}
1926
1927	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1928	{
1929		SYNCHRONIZED;
1930		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1931		{
1932			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1933			{
1934				if(shader_resource_views[s][i] == view)
1935				{
1936					shader_resource_views[s][i].p = NULL;
1937					sampler_views[s][i] = NULL;
1938					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1939				}
1940			}
1941		}
1942	}
1943#endif
1944
1945#undef SYNCHRONIZED
1946};
1947
1948#if API >= 11
1949/* This approach serves two purposes.
1950 * First, we don't want to do an atomic operation to manipulate the reference
1951 * count every time something is bound/unbound to the pipeline, since they are
1952 * expensive.
1953 * Fortunately, the immediate context can only be used by a single thread, so
1954 * we don't have to use them, as long as a separate reference count is used
1955 * (see dual_refcnt_t).
1956 *
1957 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1958 * garbage cycle.
1959 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
1960 * one for each external reference count, while internal nonatomic_add_ref doesn't
1961 * add any.
1962 *
1963 * Note that ideally we would to eliminate the non-atomic op too, but this is more
1964 * complicated, since we would either need to use garbage collection and give up
1965 * deterministic destruction (especially bad for large textures), or scan the whole
1966 * pipeline state every time the reference count of object drops to 0, which risks
1967 * pathological slowdowns.
1968 *
1969 * Since this microoptimization should matter relatively little, let's avoid it for now.
1970 *
1971 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
1972 * Eliminating the atomic ops for deferred contexts seems substantially harder.
1973 * This might be a problem if they are used in a one-shot multithreaded rendering
1974 * fashion, where SMP cacheline bouncing on the reference count may be visible.
1975 *
1976 * The idea would be to attach a structure of reference counts indexed by deferred
1977 * context id to each object. Ideally, this should be organized like ext2 block pointers.
1978 *
1979 * Every deferred context would get a reference count in its own cacheline.
1980 * The external count is protected by a lock bit, and there is also a "lock bit" in each
1981 * internal count.
1982 *
1983 * When the external count has to be dropped to 0, the lock bit is taken and all internal
1984 * reference counts are scanned, taking a count of them. A flag would also be set on them.
1985 * Deferred context manipulation would notice the flag, and update the count.
1986 * Once the count goes to zero, the object is freed.
1987 *
1988 * The problem of this is that if the external reference count ping-pongs between
1989 * zero and non-zero, the scans will take a lot of time.
1990 *
1991 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
1992 * each binary tree node would have a "determined bit", which would be invalidated
1993 * by manipulations.
1994 *
1995 * However, all this complexity might actually be a loss in most cases, so let's just
1996 * stick to a single atomic refcnt for now.
1997 *
1998 * Also, we don't even support deferred contexts yet, so this can wait.
1999 */
2000struct nonatomic_device_child_ptr_traits
2001{
2002	static void add_ref(void* p)
2003	{
2004		if(p)
2005			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2006	}
2007
2008	static void release(void* p)
2009	{
2010		if(p)
2011			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2012	}
2013};
2014
2015struct GalliumD3D11ImmediateDeviceContext
2016	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2017{
2018	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2019	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2020	{
2021		// not necessary, but tests that the API at least basically works
2022		ClearState();
2023	}
2024
2025	/* we do this since otherwise we would have a garbage cycle between this and the device */
2026	virtual ULONG STDMETHODCALLTYPE AddRef()
2027	{
2028		return this->device->AddRef();
2029	}
2030
2031	virtual ULONG STDMETHODCALLTYPE Release()
2032	{
2033		return this->device->Release();
2034	}
2035
2036	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2037	{
2038		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2039	}
2040};
2041
2042static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2043{
2044	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2045}
2046
2047static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2048{
2049	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2050}
2051
2052static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2053{
2054	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2055}
2056
2057static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2058{
2059	delete (GalliumD3D11ImmediateDeviceContext*)context;
2060}
2061#endif
2062