d3d11_context.h revision 92617aeac109481258f0c3863d09c1b8903d438b
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	BOOL render_predicate_value;
75	float blend_color[4];
76	unsigned sample_mask;
77	unsigned stencil_ref;
78	bool depth_clamp;
79
80	void* default_input_layout;
81	void* default_rasterizer;
82	void* default_depth_stencil;
83	void* default_blend;
84	void* default_sampler;
85	void* ld_sampler;
86	void * default_shaders[D3D11_STAGES];
87
88	// derived state
89	int primitive_mode;
90	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93	struct
94	{
95		void* ld; // accessed with a -1 index from v
96		void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97	} sampler_csos[D3D11_STAGES];
98	struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99	unsigned num_shader_resource_views[D3D11_STAGES];
100	unsigned num_samplers[D3D11_STAGES];
101	unsigned num_vertex_buffers;
102	unsigned num_render_target_views;
103	unsigned num_viewports;
104	unsigned num_scissor_rects;
105	unsigned num_so_targets;
106
107	struct pipe_context* pipe;
108	unsigned update_flags;
109
110	bool owns_pipe;
111	unsigned context_flags;
112
113	GalliumD3D11Caps caps;
114
115	cso_context* cso_ctx;
116	gen_mipmap_state* gen_mipmap;
117
118#if API >= 11
119#define SYNCHRONIZED do {} while(0)
120
121	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122	: GalliumD3D11DeviceChild(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123	{
124		caps = device->screen_caps;
125		init_context();
126	}
127
128	~GalliumD3D11DeviceContext()
129	{
130		destroy_context();
131	}
132#else
133#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137	{
138		caps = this->screen_caps;
139		init_context();
140	}
141
142	~GalliumD3D10Device()
143	{
144		destroy_context();
145	}
146#endif
147
148	void init_context()
149	{
150		if(!pipe->begin_query)
151			caps.queries = false;
152		if(!pipe->render_condition)
153			caps.render_condition = false;
154		if(!pipe->bind_gs_state)
155		{
156			caps.gs = false;
157			caps.stages = 2;
158		}
159		if(!pipe->set_stream_output_buffers)
160			caps.so = false;
161
162		update_flags = 0;
163
164		// pipeline state
165		memset(viewports, 0, sizeof(viewports));
166		memset(scissor_rects, 0, sizeof(scissor_rects));
167		memset(so_offsets, 0, sizeof(so_offsets));
168		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
169		index_format = DXGI_FORMAT_UNKNOWN;
170		index_offset = 0;
171		render_predicate_value = 0;
172		memset(blend_color, 0, sizeof(blend_color));
173		sample_mask = ~0;
174		stencil_ref = 0;
175		depth_clamp = 0;
176
177		// derived state
178		primitive_mode = 0;
179		memset(vertex_buffers, 0, sizeof(vertex_buffers));
180		memset(so_buffers, 0, sizeof(so_buffers));
181		memset(sampler_views, 0, sizeof(sampler_views));
182		memset(sampler_csos, 0, sizeof(sampler_csos));
183		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
184		memset(num_samplers, 0, sizeof(num_samplers));
185		num_vertex_buffers = 0;
186		num_render_target_views = 0;
187		num_viewports = 0;
188		num_scissor_rects = 0;
189		num_so_targets = 0;
190
191		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
192
193		struct pipe_rasterizer_state rasterizerd;
194		memset(&rasterizerd, 0, sizeof(rasterizerd));
195		rasterizerd.gl_rasterization_rules = 1;
196		rasterizerd.cull_face = PIPE_FACE_BACK;
197		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
198
199		struct pipe_depth_stencil_alpha_state depth_stencild;
200		memset(&depth_stencild, 0, sizeof(depth_stencild));
201		depth_stencild.depth.enabled = TRUE;
202		depth_stencild.depth.writemask = 1;
203		depth_stencild.depth.func = PIPE_FUNC_LESS;
204		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
205
206		struct pipe_blend_state blendd;
207		memset(&blendd, 0, sizeof(blendd));
208		blendd.rt[0].colormask = 0xf;
209		default_blend = pipe->create_blend_state(pipe, &blendd);
210
211		struct pipe_sampler_state samplerd;
212		memset(&samplerd, 0, sizeof(samplerd));
213		samplerd.normalized_coords = 1;
214		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
215		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
216		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
217		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
218		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
219		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
220		samplerd.border_color[0] = 1.0f;
221		samplerd.border_color[1] = 1.0f;
222		samplerd.border_color[2] = 1.0f;
223		samplerd.border_color[3] = 1.0f;
224		samplerd.min_lod = -FLT_MAX;
225		samplerd.max_lod = FLT_MAX;
226		samplerd.max_anisotropy = 1;
227		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
228
229		memset(&samplerd, 0, sizeof(samplerd));
230		samplerd.normalized_coords = 0;
231		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
232		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
233		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
234		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
235		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
236		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
237		samplerd.min_lod = -FLT_MAX;
238		samplerd.max_lod = FLT_MAX;
239		samplerd.max_anisotropy = 1;
240		ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
241
242		for(unsigned s = 0; s < D3D11_STAGES; ++s)
243		{
244			sampler_csos[s].ld = ld_sampler;
245			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
246				sampler_csos[s].v[i] = default_sampler;
247		}
248
249		// TODO: should this really be empty shaders, or should they be all-passthrough?
250		memset(default_shaders, 0, sizeof(default_shaders));
251		struct ureg_program *ureg;
252		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
253		ureg_END(ureg);
254		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
255
256		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
257		ureg_END(ureg);
258		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
259
260		cso_ctx = cso_create_context(pipe);
261		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
262
263		RestoreGalliumState();
264	}
265
266	void destroy_context()
267	{
268		util_destroy_gen_mipmap(gen_mipmap);
269		cso_destroy_context(cso_ctx);
270		pipe->delete_vertex_elements_state(pipe, default_input_layout);
271		pipe->delete_rasterizer_state(pipe, default_rasterizer);
272		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
273		pipe->delete_blend_state(pipe, default_blend);
274		pipe->delete_sampler_state(pipe, default_sampler);
275		pipe->delete_sampler_state(pipe, ld_sampler);
276		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
277		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
278		if(owns_pipe)
279			pipe->destroy(pipe);
280	}
281
282	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
283	{
284		return context_flags;
285	}
286#if API >= 11
287#define SET_SHADER_EXTRA_ARGS , \
288	__in_ecount_opt(NumClassInstances)  ID3D11ClassInstance *const *ppClassInstances, \
289	unsigned NumClassInstances
290#define GET_SHADER_EXTRA_ARGS , \
291		__out_ecount_opt(*pNumClassInstances)  ID3D11ClassInstance **ppClassInstances, \
292		__inout_opt  unsigned *pNumClassInstances
293#else
294#define SET_SHADER_EXTRA_ARGS
295#define GET_SHADER_EXTRA_ARGS
296#endif
297
298/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
299 * Instead, you have to pass a pointer to nulls to unbind things.
300 * We do the same.
301 * TODO: is D3D10 the same?
302 */
303	template<unsigned s>
304	void xs_set_shader(GalliumD3D11Shader<>* shader)
305	{
306		if(shader != shaders[s].p)
307		{
308			shaders[s] = shader;
309			void* shader_cso = shader ? shader->object : default_shaders[s];
310			switch(s)
311			{
312			case PIPE_SHADER_VERTEX:
313				pipe->bind_vs_state(pipe, shader_cso);
314				break;
315			case PIPE_SHADER_FRAGMENT:
316				pipe->bind_fs_state(pipe, shader_cso);
317				break;
318			case PIPE_SHADER_GEOMETRY:
319				pipe->bind_gs_state(pipe, shader_cso);
320				break;
321			}
322			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
323		}
324	}
325
326	template<unsigned s>
327	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
328	{
329		for(unsigned i = 0; i < count; ++i)
330		{
331			if(constbufs[i] != constant_buffers[s][i].p)
332			{
333				constant_buffers[s][i] = constbufs[i];
334				if(s < caps.stages && start + i < caps.constant_buffers[s])
335					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
336			}
337		}
338	}
339
340	template<unsigned s>
341	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
342	{
343		int last_different = -1;
344		for(unsigned i = 0; i < count; ++i)
345		{
346			if(shader_resource_views[s][start + i].p != srvs[i])
347			{
348				shader_resource_views[s][start + i] = srvs[i];
349				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
350				last_different = i;
351			}
352		}
353		if(last_different >= 0)
354		{
355			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
356			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
357		}
358	}
359
360	template<unsigned s>
361	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
362	{
363		int last_different = -1;
364		for(unsigned i = 0; i < count; ++i)
365		{
366			if(samplers[s][start + i].p != samps[i])
367			{
368				samplers[s][start + i] = samps[i];
369				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
370			}
371			if(last_different >= 0)
372			{
373				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
374				update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
375			}
376		}
377	}
378
379#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
380	virtual void STDMETHODCALLTYPE XS##SetShader( \
381		__in_opt  ID3D11##Stage##Shader *pShader \
382		SET_SHADER_EXTRA_ARGS) \
383	{ \
384		SYNCHRONIZED; \
385		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
386	} \
387	virtual void STDMETHODCALLTYPE XS##GetShader(\
388		__out  ID3D11##Stage##Shader **ppShader \
389		GET_SHADER_EXTRA_ARGS) \
390	{ \
391		SYNCHRONIZED; \
392		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
393	} \
394	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
395		__in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1)  unsigned StartSlot, \
396		__in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot)  unsigned NumBuffers, \
397		__in_ecount(NumBuffers)  ID3D11Buffer *const *ppConstantBuffers) \
398	{ \
399		SYNCHRONIZED; \
400		xs_set_constant_buffers<D3D11_STAGE_##XS>(StartSlot, NumBuffers, (GalliumD3D11Buffer *const *)ppConstantBuffers); \
401	} \
402	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
403		__in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1)  unsigned StartSlot, \
404		__in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot)  unsigned NumBuffers, \
405		__out_ecount(NumBuffers)  ID3D11Buffer **ppConstantBuffers) \
406	{ \
407		SYNCHRONIZED; \
408		for(unsigned i = 0; i < NumBuffers; ++i) \
409			ppConstantBuffers[i] = constant_buffers[D3D11_STAGE_##XS][StartSlot + i].ref(); \
410	} \
411	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
412		__in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1)  unsigned StartSlot, \
413		__in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot)  unsigned NumViews, \
414		__in_ecount(NumViews)  ID3D11ShaderResourceView *const *ppShaderResourceViews) \
415	{ \
416		SYNCHRONIZED; \
417		xs_set_shader_resources<D3D11_STAGE_##XS>(StartSlot, NumViews, (GalliumD3D11ShaderResourceView *const *)ppShaderResourceViews); \
418	} \
419	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
420		__in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1)  unsigned StartSlot, \
421		__in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot)  unsigned NumViews, \
422		__out_ecount(NumViews)  ID3D11ShaderResourceView **ppShaderResourceViews) \
423	{ \
424		SYNCHRONIZED; \
425		for(unsigned i = 0; i < NumViews; ++i) \
426			ppShaderResourceViews[i] = shader_resource_views[D3D11_STAGE_##XS][StartSlot + i].ref(); \
427	} \
428	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
429		__in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1)  unsigned StartSlot, \
430		__in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot)  unsigned NumSamplers, \
431		__in_ecount(NumSamplers)  ID3D11SamplerState *const *ppSamplers) \
432	{ \
433		SYNCHRONIZED; \
434		xs_set_samplers<D3D11_STAGE_##XS>(StartSlot, NumSamplers, (GalliumD3D11SamplerState *const *)ppSamplers); \
435	} \
436	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
437		__in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1)  unsigned StartSlot, \
438		__in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot)  unsigned NumSamplers, \
439		__out_ecount(NumSamplers)  ID3D11SamplerState **ppSamplers) \
440	{ \
441		SYNCHRONIZED; \
442		for(unsigned i = 0; i < NumSamplers; ++i) \
443			ppSamplers[i] = samplers[D3D11_STAGE_##XS][StartSlot + i].ref(); \
444	}
445
446#define DO_VS(x) x
447#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
448#define DO_PS(x) x
449#define DO_HS(x)
450#define DO_DS(x)
451#define DO_CS(x)
452	IMPLEMENT_SHADER_STAGE(VS, Vertex)
453	IMPLEMENT_SHADER_STAGE(GS, Geometry)
454	IMPLEMENT_SHADER_STAGE(PS, Pixel)
455
456#if API >= 11
457	IMPLEMENT_SHADER_STAGE(HS, Hull)
458	IMPLEMENT_SHADER_STAGE(DS, Domain)
459	IMPLEMENT_SHADER_STAGE(CS, Compute)
460
461	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
462		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1)  unsigned StartSlot,
463		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot)  unsigned NumUAVs,
464		__in_ecount(NumUAVs)  ID3D11UnorderedAccessView *const *ppUnorderedAccessViews,
465		__in_ecount(NumUAVs)  const unsigned *pUAVInitialCounts)
466	{
467		SYNCHRONIZED;
468		for(unsigned i = 0; i < NumUAVs; ++i)
469			cs_unordered_access_views[StartSlot + i] = ppUnorderedAccessViews[i];
470	}
471
472	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
473		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1)  unsigned StartSlot,
474		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot)  unsigned NumUAVs,
475		__out_ecount(NumUAVs)  ID3D11UnorderedAccessView **ppUnorderedAccessViews)
476	{
477		SYNCHRONIZED;
478		for(unsigned i = 0; i < NumUAVs; ++i)
479			ppUnorderedAccessViews[i] = cs_unordered_access_views[StartSlot + i].ref();
480	}
481#endif
482
483	template<unsigned s>
484	void update_stage()
485	{
486		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
487		{
488			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
489				--num_shader_resource_views[s];
490			if(s < caps.stages)
491			{
492				struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
493				unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
494				for(unsigned i = 0; i < num_views_to_bind; ++i)
495				{
496					views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
497				}
498				switch(s)
499				{
500				case PIPE_SHADER_VERTEX:
501					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
502					break;
503				case PIPE_SHADER_FRAGMENT:
504					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
505					break;
506				case PIPE_SHADER_GEOMETRY:
507					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
508					break;
509				}
510			}
511		}
512
513		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
514		{
515			while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
516				--num_samplers[s];
517			if(s < caps.stages)
518			{
519				void* samplers_to_bind[PIPE_MAX_SAMPLERS];
520				unsigned num_samplers_to_bind =  shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
521				for(unsigned i = 0; i < num_samplers_to_bind; ++i)
522				{
523					// index can be -1 to access sampler_csos[s].ld
524					samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
525				}
526				switch(s)
527				{
528				case PIPE_SHADER_VERTEX:
529					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
530					break;
531				case PIPE_SHADER_FRAGMENT:
532					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
533					break;
534				case PIPE_SHADER_GEOMETRY:
535					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
536					break;
537				}
538			}
539		}
540	}
541
542	void update_state()
543	{
544		update_stage<D3D11_STAGE_PS>();
545		update_stage<D3D11_STAGE_VS>();
546		update_stage<D3D11_STAGE_GS>();
547#if API >= 11
548		update_stage<D3D11_STAGE_HS>();
549		update_stage<D3D11_STAGE_DS>();
550		update_stage<D3D11_STAGE_CS>();
551#endif
552
553		if(update_flags & UPDATE_VERTEX_BUFFERS)
554		{
555			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
556				--num_vertex_buffers;
557			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
558		}
559
560		update_flags = 0;
561	}
562
563	virtual void STDMETHODCALLTYPE IASetInputLayout(
564		__in_opt  ID3D11InputLayout *pInputLayout)
565	{
566		SYNCHRONIZED;
567		if(pInputLayout != input_layout.p)
568		{
569			input_layout = pInputLayout;
570			pipe->bind_vertex_elements_state(pipe, pInputLayout ? ((GalliumD3D11InputLayout*)pInputLayout)->object : default_input_layout);
571		}
572	}
573
574	virtual void STDMETHODCALLTYPE IAGetInputLayout(
575		__out  ID3D11InputLayout **ppInputLayout)
576	{
577		SYNCHRONIZED;
578		*ppInputLayout = input_layout.ref();
579	}
580
581	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
582		__in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1)  unsigned StartSlot,
583		__in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot)  unsigned NumBuffers,
584		__in_ecount(NumBuffers)  ID3D11Buffer *const *ppVertexBuffers,
585		__in_ecount(NumBuffers)  const unsigned *pStrides,
586		__in_ecount(NumBuffers)  const unsigned *pOffsets)
587	{
588		SYNCHRONIZED;
589		int last_different = -1;
590		for(unsigned i = 0; i < NumBuffers; ++i)
591		{
592			ID3D11Buffer* buffer = ppVertexBuffers[i];
593			if(buffer != input_buffers[StartSlot + i].p
594				|| vertex_buffers[StartSlot + i].buffer_offset != pOffsets[i]
595				|| vertex_buffers[StartSlot + i].stride != pOffsets[i]
596			)
597			{
598				input_buffers[StartSlot + i] = buffer;
599				vertex_buffers[StartSlot + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
600				vertex_buffers[StartSlot + i].buffer_offset = pOffsets[i];
601				vertex_buffers[StartSlot + i].stride = pStrides[i];
602				vertex_buffers[StartSlot + i].max_index = ~0;
603				last_different = i;
604			}
605		}
606		if(last_different >= 0)
607		{
608			num_vertex_buffers = std::max(num_vertex_buffers, StartSlot + NumBuffers);
609			update_flags |= UPDATE_VERTEX_BUFFERS;
610		}
611	}
612
613	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
614		__in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1)  unsigned StartSlot,
615		__in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot)  unsigned NumBuffers,
616		__out_ecount_opt(NumBuffers)  ID3D11Buffer **ppVertexBuffers,
617		__out_ecount_opt(NumBuffers)  unsigned *pStrides,
618		__out_ecount_opt(NumBuffers)  unsigned *pOffsets)
619	{
620		SYNCHRONIZED;
621		if(ppVertexBuffers)
622		{
623			for(unsigned i = 0; i < NumBuffers; ++i)
624				ppVertexBuffers[i] = input_buffers[StartSlot + i].ref();
625		}
626
627		if(pOffsets)
628		{
629			for(unsigned i = 0; i < NumBuffers; ++i)
630				pOffsets[i] = vertex_buffers[StartSlot + i].buffer_offset;
631		}
632
633		if(pStrides)
634		{
635			for(unsigned i = 0; i < NumBuffers; ++i)
636				pStrides[i] = vertex_buffers[StartSlot + i].stride;
637		}
638	}
639
640	void set_index_buffer()
641	{
642		pipe_index_buffer ib;
643		if(!index_buffer)
644		{
645			memset(&ib, 0, sizeof(ib));
646		}
647		else
648		{
649			if(index_format == DXGI_FORMAT_R32_UINT)
650				ib.index_size = 4;
651			else if(index_format == DXGI_FORMAT_R16_UINT)
652				ib.index_size = 2;
653			else
654				ib.index_size = 1;
655			ib.offset = index_offset;
656			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
657		}
658		pipe->set_index_buffer(pipe, &ib);
659	}
660
661	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
662		__in_opt  ID3D11Buffer *pIndexBuffer,
663		__in  DXGI_FORMAT Format,
664		__in  unsigned Offset)
665	{
666		SYNCHRONIZED;
667		if(index_buffer.p != pIndexBuffer || index_format != Format || index_offset != Offset)
668		{
669			index_buffer = pIndexBuffer;
670			index_format = Format;
671			index_offset = Offset;
672
673			set_index_buffer();
674		}
675	}
676
677	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
678		__out_opt  ID3D11Buffer **pIndexBuffer,
679		__out_opt  DXGI_FORMAT *Format,
680		__out_opt  unsigned *Offset)
681	{
682		SYNCHRONIZED;
683		if(pIndexBuffer)
684			*pIndexBuffer = index_buffer.ref();
685		if(Format)
686			*Format = index_format;
687		if(Offset)
688			*Offset = index_offset;
689	}
690
691	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
692		__in  D3D11_PRIMITIVE_TOPOLOGY Topology)
693	{
694		SYNCHRONIZED;
695		if(primitive_topology != Topology)
696		{
697			if(Topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
698				primitive_mode = d3d_to_pipe_prim[Topology];
699			else
700				primitive_mode = 0;
701			primitive_topology = Topology;
702		}
703	}
704
705	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
706		__out  D3D11_PRIMITIVE_TOPOLOGY *pTopology)
707	{
708		SYNCHRONIZED;
709		*pTopology = primitive_topology;
710	}
711
712	virtual void STDMETHODCALLTYPE DrawIndexed(
713		__in  unsigned IndexCount,
714		__in  unsigned StartIndexLocation,
715		__in  int BaseVertexLocation)
716	{
717		SYNCHRONIZED;
718		if(update_flags)
719			update_state();
720
721		pipe_draw_info info;
722		info.mode = primitive_mode;
723		info.indexed = TRUE;
724		info.count = IndexCount;
725		info.start = StartIndexLocation;
726		info.index_bias = BaseVertexLocation;
727		info.min_index = 0;
728		info.max_index = ~0;
729		info.start_instance = 0;
730		info.instance_count = 1;
731
732		pipe->draw_vbo(pipe, &info);
733	}
734
735	virtual void STDMETHODCALLTYPE Draw(
736		__in  unsigned VertexCount,
737		__in  unsigned StartVertexLocation)
738	{
739		SYNCHRONIZED;
740		if(update_flags)
741			update_state();
742
743		pipe_draw_info info;
744		info.mode = primitive_mode;
745		info.indexed = FALSE;
746		info.count = VertexCount;
747		info.start = StartVertexLocation;
748		info.index_bias = 0;
749		info.min_index = 0;
750		info.max_index = ~0;
751		info.start_instance = 0;
752		info.instance_count = 1;
753
754		pipe->draw_vbo(pipe, &info);
755	}
756
757	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
758		__in  unsigned IndexCountPerInstance,
759		__in  unsigned InstanceCount,
760		__in  unsigned StartIndexLocation,
761		__in  int BaseVertexLocation,
762		__in  unsigned StartInstanceLocation)
763	{
764		SYNCHRONIZED;
765		if(update_flags)
766			update_state();
767
768		pipe_draw_info info;
769		info.mode = primitive_mode;
770		info.indexed = TRUE;
771		info.count = IndexCountPerInstance;
772		info.start = StartIndexLocation;
773		info.index_bias = BaseVertexLocation;
774		info.min_index = 0;
775		info.max_index = ~0;
776		info.start_instance = StartInstanceLocation;
777		info.instance_count = InstanceCount;
778
779		pipe->draw_vbo(pipe, &info);
780	}
781
782	virtual void STDMETHODCALLTYPE DrawInstanced(
783		__in  unsigned VertexCountPerInstance,
784		__in  unsigned InstanceCount,
785		__in  unsigned StartVertexLocation,
786		__in  unsigned StartInstanceLocation)
787	{
788		SYNCHRONIZED;
789		if(update_flags)
790			update_state();
791
792		pipe_draw_info info;
793		info.mode = primitive_mode;
794		info.indexed = FALSE;
795		info.count = VertexCountPerInstance;
796		info.start = StartVertexLocation;
797		info.index_bias = 0;
798		info.min_index = 0;
799		info.max_index = ~0;
800		info.start_instance = StartInstanceLocation;
801		info.instance_count = InstanceCount;
802
803		pipe->draw_vbo(pipe, &info);
804	}
805
806	virtual void STDMETHODCALLTYPE DrawAuto(void)
807	{
808		if(!caps.so)
809			return;
810
811		SYNCHRONIZED;
812		if(update_flags)
813			update_state();
814
815		pipe->draw_stream_output(pipe, primitive_mode);
816	}
817
818	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
819		__in  ID3D11Buffer *pBufferForArgs,
820		__in  unsigned AlignedByteOffsetForArgs)
821	{
822		SYNCHRONIZED;
823		if(update_flags)
824			update_state();
825
826		struct {
827			unsigned count;
828			unsigned instance_count;
829			unsigned start;
830			unsigned index_bias;
831		} data;
832
833		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data);
834
835		pipe_draw_info info;
836		info.mode = primitive_mode;
837		info.indexed = TRUE;
838		info.start = data.start;
839		info.count = data.count;
840		info.index_bias = data.index_bias;
841		info.min_index = 0;
842		info.max_index = ~0;
843		info.start_instance = 0;
844		info.instance_count = data.instance_count;
845
846		pipe->draw_vbo(pipe, &info);
847	}
848
849	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
850		__in  ID3D11Buffer *pBufferForArgs,
851		__in  unsigned AlignedByteOffsetForArgs)
852	{
853		SYNCHRONIZED;
854		if(update_flags)
855			update_state();
856
857		struct {
858			unsigned count;
859			unsigned instance_count;
860			unsigned start;
861		} data;
862
863		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data);
864
865		pipe_draw_info info;
866		info.mode = primitive_mode;
867		info.indexed = FALSE;
868		info.start = data.start;
869		info.count = data.count;
870		info.index_bias = 0;
871		info.min_index = 0;
872		info.max_index = ~0;
873		info.start_instance = 0;
874		info.instance_count = data.instance_count;
875
876		pipe->draw_vbo(pipe, &info);
877	}
878
879#if API >= 11
880	virtual void STDMETHODCALLTYPE Dispatch(
881		__in  unsigned ThreadGroupCountX,
882		__in  unsigned ThreadGroupCountY,
883		__in  unsigned ThreadGroupCountZ)
884	{
885// uncomment this when this is implemented
886//		SYNCHRONIZED;
887//		if(update_flags)
888//			update_state();
889	}
890
891	virtual void STDMETHODCALLTYPE DispatchIndirect(
892		__in  ID3D11Buffer *pBufferForArgs,
893		__in  unsigned AlignedByteOffsetForArgs)
894	{
895// uncomment this when this is implemented
896//		SYNCHRONIZED;
897//		if(update_flags)
898//			update_state();
899	}
900#endif
901
902	void set_clip()
903	{
904		SYNCHRONIZED;
905		pipe_clip_state clip;
906		clip.nr = 0;
907		clip.depth_clamp = depth_clamp;
908		pipe->set_clip_state(pipe, &clip);
909	}
910
911	virtual void STDMETHODCALLTYPE RSSetState(
912		__in_opt  ID3D11RasterizerState *pRasterizerState)
913	{
914		SYNCHRONIZED;
915		if(pRasterizerState != rasterizer_state.p)
916		{
917			rasterizer_state = pRasterizerState;
918			pipe->bind_rasterizer_state(pipe, pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->object : default_rasterizer);
919			bool new_depth_clamp = pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->depth_clamp : false;
920			if(depth_clamp != new_depth_clamp)
921			{
922				depth_clamp = new_depth_clamp;
923				set_clip();
924			}
925		}
926	}
927
928	virtual void STDMETHODCALLTYPE RSGetState(
929		__out  ID3D11RasterizerState **ppRasterizerState)
930	{
931		SYNCHRONIZED;
932		*ppRasterizerState = rasterizer_state.ref();
933	}
934
935	void set_viewport()
936	{
937		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
938		pipe_viewport_state viewport;
939		float half_width = viewports[0].Width * 0.5f;
940		float half_height = viewports[0].Height * 0.5f;
941
942		viewport.scale[0] = half_width;
943		viewport.scale[1] = -half_height;
944		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
945		viewport.scale[3] = 1.0f;
946		viewport.translate[0] = half_width + viewports[0].TopLeftX;
947		viewport.translate[1] = half_height + viewports[0].TopLeftY;
948		viewport.translate[2] = viewports[0].MinDepth;
949		viewport.translate[3] = 1.0f;
950		pipe->set_viewport_state(pipe, &viewport);
951	}
952
953	virtual void STDMETHODCALLTYPE RSSetViewports(
954		__in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE)  unsigned NumViewports,
955		__in_ecount_opt(NumViewports)  const D3D11_VIEWPORT *pViewports)
956	{
957		SYNCHRONIZED;
958		if(NumViewports)
959		{
960			if(memcmp(&viewports[0], &pViewports[0], sizeof(viewports[0])))
961			{
962				viewports[0] = pViewports[0];
963				set_viewport();
964			}
965			for(unsigned i = 1; i < NumViewports; ++i)
966				viewports[i] = pViewports[i];
967		}
968		else if(num_viewports)
969		{
970			// TODO: what should we do here?
971			memset(&viewports[0], 0, sizeof(viewports[0]));
972			set_viewport();
973		}
974		num_viewports = NumViewports;
975	}
976
977	virtual void STDMETHODCALLTYPE RSGetViewports(
978		__inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE)   unsigned *pNumViewports,
979		__out_ecount_opt(*pNumViewports)  D3D11_VIEWPORT *pViewports)
980	{
981		SYNCHRONIZED;
982		if(pViewports)
983		{
984			unsigned i;
985			for(i = 0; i < std::min(*pNumViewports, num_viewports); ++i)
986				pViewports[i] = viewports[i];
987
988			memset(pViewports + i, 0, (*pNumViewports - i) * sizeof(D3D11_VIEWPORT));
989		}
990
991		*pNumViewports = num_viewports;
992	}
993
994	void set_scissor()
995	{
996		pipe_scissor_state scissor;
997		scissor.minx = scissor_rects[0].left;
998		scissor.miny = scissor_rects[0].top;
999		scissor.maxx = scissor_rects[0].right;
1000		scissor.maxy = scissor_rects[0].bottom;
1001		pipe->set_scissor_state(pipe, &scissor);
1002	}
1003
1004	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1005		__in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE)  unsigned NumRects,
1006		__in_ecount_opt(NumRects)  const D3D11_RECT *pRects)
1007	{
1008		SYNCHRONIZED;
1009		if(NumRects)
1010		{
1011			if(memcmp(&scissor_rects[0], &pRects[0], sizeof(scissor_rects[0])))
1012			{
1013				scissor_rects[0] = pRects[0];
1014				set_scissor();
1015			}
1016			for(unsigned i = 1; i < NumRects; ++i)
1017				scissor_rects[i] = pRects[i];
1018		}
1019		else if(num_scissor_rects)
1020		{
1021			// TODO: what should we do here?
1022			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1023			set_scissor();
1024		}
1025
1026		num_scissor_rects = NumRects;
1027	}
1028
1029	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1030		__inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE)   unsigned *pNumRects,
1031		__out_ecount_opt(*pNumRects)  D3D11_RECT *pRects)
1032	{
1033		SYNCHRONIZED;
1034		if(pRects)
1035		{
1036			unsigned i;
1037			for(i = 0; i < std::min(*pNumRects, num_scissor_rects); ++i)
1038				pRects[i] = scissor_rects[i];
1039
1040			memset(pRects + i, 0, (*pNumRects - i) * sizeof(D3D11_RECT));
1041		}
1042
1043		*pNumRects = num_scissor_rects;
1044	}
1045
1046	virtual void STDMETHODCALLTYPE OMSetBlendState(
1047		__in_opt  ID3D11BlendState *pBlendState,
1048		__in_opt  const float BlendFactor[ 4 ],
1049		__in  unsigned SampleMask)
1050	{
1051		SYNCHRONIZED;
1052		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1053
1054		if(blend_state.p != pBlendState)
1055		{
1056			pipe->bind_blend_state(pipe, pBlendState ? ((GalliumD3D11BlendState*)pBlendState)->object : default_blend);
1057			blend_state = pBlendState;
1058		}
1059
1060		// Windows D3D11 does this, even though it's apparently undocumented
1061		if(!BlendFactor)
1062			BlendFactor = white;
1063
1064		if(memcmp(blend_color, BlendFactor, sizeof(blend_color)))
1065		{
1066			pipe->set_blend_color(pipe, (struct pipe_blend_color*)BlendFactor);
1067			memcpy(blend_color, BlendFactor, sizeof(blend_color));
1068		}
1069
1070		if(sample_mask != SampleMask)
1071		{
1072			pipe->set_sample_mask(pipe, sample_mask);
1073			sample_mask = SampleMask;
1074		}
1075	}
1076
1077	virtual void STDMETHODCALLTYPE OMGetBlendState(
1078		__out_opt  ID3D11BlendState **ppBlendState,
1079		__out_opt  float BlendFactor[ 4 ],
1080		__out_opt  unsigned *pSampleMask)
1081	{
1082		SYNCHRONIZED;
1083		if(ppBlendState)
1084			*ppBlendState = blend_state.ref();
1085		if(BlendFactor)
1086			memcpy(BlendFactor, blend_color, sizeof(blend_color));
1087		if(pSampleMask)
1088			*pSampleMask = sample_mask;
1089	}
1090
1091	void set_stencil_ref()
1092	{
1093		struct pipe_stencil_ref sref;
1094		sref.ref_value[0] = stencil_ref;
1095		sref.ref_value[1] = stencil_ref;
1096		pipe->set_stencil_ref(pipe, &sref);
1097	}
1098
1099	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1100		__in_opt  ID3D11DepthStencilState *pDepthStencilState,
1101		__in  unsigned StencilRef)
1102	{
1103		SYNCHRONIZED;
1104		if(pDepthStencilState != depth_stencil_state.p)
1105		{
1106			pipe->bind_depth_stencil_alpha_state(pipe, pDepthStencilState ? ((GalliumD3D11DepthStencilState*)pDepthStencilState)->object : default_depth_stencil);
1107			depth_stencil_state = pDepthStencilState;
1108		}
1109
1110		if(StencilRef != stencil_ref)
1111		{
1112			stencil_ref = StencilRef;
1113			set_stencil_ref();
1114		}
1115	}
1116
1117	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1118		__out_opt  ID3D11DepthStencilState **ppDepthStencilState,
1119		__out_opt  unsigned *pStencilRef)
1120	{
1121		SYNCHRONIZED;
1122		if(*ppDepthStencilState)
1123			*ppDepthStencilState = depth_stencil_state.ref();
1124		if(pStencilRef)
1125			*pStencilRef = stencil_ref;
1126	}
1127
1128	void set_framebuffer()
1129	{
1130		struct pipe_framebuffer_state fb;
1131		memset(&fb, 0, sizeof(fb));
1132		if(depth_stencil_view)
1133		{
1134			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1135			fb.zsbuf = surf;
1136			if(surf->width > fb.width)
1137				fb.width = surf->width;
1138			if(surf->height > fb.height)
1139				fb.height = surf->height;
1140		}
1141		fb.nr_cbufs = num_render_target_views;
1142		unsigned i;
1143		for(i = 0; i < num_render_target_views; ++i)
1144		{
1145			if(render_target_views[i])
1146			{
1147				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1148				fb.cbufs[i] = surf;
1149				if(surf->width > fb.width)
1150					fb.width = surf->width;
1151				if(surf->height > fb.height)
1152					fb.height = surf->height;
1153			}
1154		}
1155
1156		pipe->set_framebuffer_state(pipe, &fb);
1157	}
1158
1159	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1160	 * Hopefully nobody relies on this happening
1161	 */
1162
1163	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1164		__in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT)  unsigned NumViews,
1165		__in_ecount_opt(NumViews)  ID3D11RenderTargetView *const *ppRenderTargetViews,
1166		__in_opt  ID3D11DepthStencilView *pDepthStencilView)
1167	{
1168		SYNCHRONIZED;
1169		if(!ppRenderTargetViews)
1170			NumViews = 0;
1171		if(NumViews == num_render_target_views)
1172		{
1173			for(unsigned i = 0; i < NumViews; ++i)
1174			{
1175				if(ppRenderTargetViews[i] != render_target_views[i].p)
1176					goto changed;
1177			}
1178			return;
1179		}
1180changed:
1181		depth_stencil_view = pDepthStencilView;
1182		unsigned i;
1183		for(i = 0; i < NumViews; ++i)
1184		{
1185			render_target_views[i] = ppRenderTargetViews[i];
1186#if API >= 11
1187			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1188#endif
1189		}
1190		for(; i < num_render_target_views; ++i)
1191			render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1192		num_render_target_views = NumViews;
1193		set_framebuffer();
1194	}
1195
1196	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1197		__in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT)  unsigned NumViews,
1198		__out_ecount_opt(NumViews)  ID3D11RenderTargetView **ppRenderTargetViews,
1199		__out_opt  ID3D11DepthStencilView **ppDepthStencilView)
1200	{
1201		SYNCHRONIZED;
1202		if(ppRenderTargetViews)
1203		{
1204			unsigned i;
1205			for(i = 0; i < std::min(num_render_target_views, NumViews); ++i)
1206				ppRenderTargetViews[i] = render_target_views[i].ref();
1207
1208			for(; i < NumViews; ++i)
1209				ppRenderTargetViews[i] = 0;
1210		}
1211
1212		if(ppDepthStencilView)
1213			*ppDepthStencilView = depth_stencil_view.ref();
1214	}
1215
1216#if API >= 11
1217	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1218	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1219		__in  unsigned NumRTVs,
1220		__in_ecount_opt(NumRTVs)  ID3D11RenderTargetView *const *ppRenderTargetViews,
1221		__in_opt  ID3D11DepthStencilView *pDepthStencilView,
1222		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1)  unsigned UAVStartSlot,
1223		__in  unsigned NumUAVs,
1224		__in_ecount_opt(NumUAVs)  ID3D11UnorderedAccessView *const *ppUnorderedAccessViews,
1225		__in_ecount_opt(NumUAVs)  const unsigned *pUAVInitialCounts)
1226	{
1227		SYNCHRONIZED;
1228		if(NumRTVs != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1229			OMSetRenderTargets(NumRTVs, ppRenderTargetViews, pDepthStencilView);
1230
1231		if(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1232		{
1233			for(unsigned i = 0; i < NumUAVs; ++i)
1234			{
1235				om_unordered_access_views[UAVStartSlot + i] = ppUnorderedAccessViews[i];
1236				render_target_views[UAVStartSlot + i] = (ID3D11RenderTargetView*)0;
1237			}
1238		}
1239	}
1240
1241	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1242		__in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT)  unsigned NumRTVs,
1243		__out_ecount_opt(NumRTVs)  ID3D11RenderTargetView **ppRenderTargetViews,
1244		__out_opt  ID3D11DepthStencilView **ppDepthStencilView,
1245		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1)  unsigned UAVStartSlot,
1246		__in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - UAVStartSlot)  unsigned NumUAVs,
1247		__out_ecount_opt(NumUAVs)  ID3D11UnorderedAccessView **ppUnorderedAccessViews)
1248	{
1249		SYNCHRONIZED;
1250		if(ppRenderTargetViews)
1251			OMGetRenderTargets(NumRTVs, ppRenderTargetViews, ppDepthStencilView);
1252
1253		if(ppUnorderedAccessViews)
1254		{
1255			for(unsigned i = 0; i < NumUAVs; ++i)
1256				ppUnorderedAccessViews[i] = om_unordered_access_views[UAVStartSlot + i].ref();
1257		}
1258	}
1259#endif
1260
1261	virtual void STDMETHODCALLTYPE SOSetTargets(
1262		__in_range(0, D3D11_SO_BUFFER_SLOT_COUNT)  unsigned NumBuffers,
1263		__in_ecount_opt(NumBuffers)  ID3D11Buffer *const *ppSOTargets,
1264		__in_ecount_opt(NumBuffers)  const unsigned *pOffsets)
1265	{
1266		SYNCHRONIZED;
1267		unsigned i;
1268		if(!ppSOTargets)
1269			NumBuffers = 0;
1270		bool changed = false;
1271		for(i = 0; i < NumBuffers; ++i)
1272		{
1273			ID3D11Buffer* buffer = ppSOTargets[i];
1274			if(buffer != so_targets[i].p || pOffsets[i] != so_offsets[i])
1275			{
1276				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1277				so_targets[i] = buffer;
1278				so_offsets[i] = pOffsets[i];
1279				changed = true;
1280			}
1281		}
1282		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1283		{
1284			if(so_targets[i].p || so_offsets[i])
1285			{
1286				changed = true;
1287				so_targets[i] = (ID3D11Buffer*)0;
1288				so_offsets[i] = 0;
1289			}
1290		}
1291		num_so_targets = NumBuffers;
1292
1293		if(changed && caps.so)
1294			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1295	}
1296
1297	virtual void STDMETHODCALLTYPE SOGetTargets(
1298		__in_range(0, D3D11_SO_BUFFER_SLOT_COUNT)  unsigned NumBuffers,
1299		__out_ecount(NumBuffers)  ID3D11Buffer **ppSOTargets
1300#if API < 11
1301		, __out_ecount(NumBuffers)  UINT *pOffsets
1302#endif
1303		)
1304	{
1305		SYNCHRONIZED;
1306		for(unsigned i = 0; i < NumBuffers; ++i)
1307		{
1308			ppSOTargets[i] = so_targets[i].ref();
1309#if API < 11
1310			pOffsets[i] = so_offsets[i];
1311#endif
1312		}
1313	}
1314
1315	virtual void STDMETHODCALLTYPE Begin(
1316		__in  ID3D11Asynchronous *pAsync)
1317	{
1318		SYNCHRONIZED;
1319		if(caps.queries)
1320			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query);
1321	}
1322
1323	virtual void STDMETHODCALLTYPE End(
1324		__in  ID3D11Asynchronous *pAsync)
1325	{
1326		SYNCHRONIZED;
1327		if(caps.queries)
1328			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query);
1329	}
1330
1331	virtual HRESULT STDMETHODCALLTYPE GetData(
1332		__in  ID3D11Asynchronous *pAsync,
1333		__out_bcount_opt(DataSize)  void *pData,
1334		__in  unsigned DataSize,
1335		__in  unsigned GetDataFlags)
1336	{
1337		SYNCHRONIZED;
1338		if(!caps.queries)
1339			return E_NOTIMPL;
1340
1341		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)pAsync;
1342		void* data = alloca(async->data_size);
1343		boolean ret = pipe->get_query_result(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query, !(GetDataFlags & D3D11_ASYNC_GETDATA_DONOTFLUSH), data);
1344		if(pData)
1345			memcpy(pData, data, std::min(async->data_size, DataSize));
1346		return ret ? S_OK : S_FALSE;
1347	}
1348
1349	void set_render_condition()
1350	{
1351		if(caps.render_condition)
1352		{
1353			if(!render_predicate)
1354				pipe->render_condition(pipe, 0, 0);
1355			else
1356			{
1357				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1358				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1359				{
1360					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1361					pipe->render_condition(pipe, predicate->query, mode);
1362				}
1363				else
1364				{
1365					/* TODO: add inverted predication to Gallium*/
1366					pipe->render_condition(pipe, 0, 0);
1367				}
1368			}
1369		}
1370	}
1371
1372	virtual void STDMETHODCALLTYPE SetPredication(
1373		__in_opt  ID3D11Predicate *pPredicate,
1374		__in  BOOL PredicateValue)
1375	{
1376		SYNCHRONIZED;
1377		if(render_predicate.p != pPredicate || render_predicate_value != PredicateValue)
1378		{
1379			render_predicate = pPredicate;
1380			render_predicate_value = PredicateValue;
1381			set_render_condition();
1382		}
1383	}
1384
1385	virtual void STDMETHODCALLTYPE GetPredication(
1386		__out_opt  ID3D11Predicate **ppPredicate,
1387		__out_opt  BOOL *pPredicateValue)
1388	{
1389		SYNCHRONIZED;
1390		if(ppPredicate)
1391			*ppPredicate = render_predicate.ref();
1392		if(pPredicateValue)
1393			*pPredicateValue = render_predicate_value;
1394	}
1395
1396	static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource)
1397	{
1398		pipe_subresource sr;
1399		if(subresource <= resource->last_level)
1400		{
1401			sr.level = subresource;
1402			sr.face = 0;
1403		}
1404		else
1405		{
1406			unsigned levels = resource->last_level + 1;
1407			sr.level = subresource % levels;
1408			sr.face = subresource / levels;
1409		}
1410		return sr;
1411	}
1412
1413	virtual HRESULT STDMETHODCALLTYPE Map(
1414		__in  ID3D11Resource *pResource,
1415		__in  unsigned Subresource,
1416		__in  D3D11_MAP MapType,
1417		__in  unsigned MapFlags,
1418		__out  D3D11_MAPPED_SUBRESOURCE *pMappedResource)
1419	{
1420		SYNCHRONIZED;
1421		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1422		if(resource->transfers.count(Subresource))
1423			return E_FAIL;
1424		pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, Subresource);
1425		pipe_box box;
1426		d3d11_to_pipe_box(resource->resource, sr.level, 0);
1427		unsigned usage = 0;
1428		if(MapType == D3D11_MAP_READ)
1429			usage = PIPE_TRANSFER_READ;
1430		else if(MapType == D3D11_MAP_WRITE)
1431			usage = PIPE_TRANSFER_WRITE;
1432		else if(MapType == D3D11_MAP_READ_WRITE)
1433			usage = PIPE_TRANSFER_READ_WRITE;
1434		else if(MapType == D3D11_MAP_WRITE_DISCARD)
1435			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1436		else if(MapType == D3D11_MAP_WRITE_NO_OVERWRITE)
1437			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1438		else
1439			return E_INVALIDARG;
1440		if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT)
1441			usage |= PIPE_TRANSFER_DONTBLOCK;
1442		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box);
1443		if(!transfer) {
1444			if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT)
1445				return DXGI_ERROR_WAS_STILL_DRAWING;
1446			else
1447				return E_FAIL;
1448		}
1449		resource->transfers[Subresource] = transfer;
1450		pipe->transfer_map(pipe, transfer);
1451		pMappedResource->pData = transfer->data;
1452		pMappedResource->RowPitch = transfer->stride;
1453		pMappedResource->DepthPitch = transfer->slice_stride;
1454		return S_OK;
1455	}
1456
1457	virtual void STDMETHODCALLTYPE Unmap(
1458		__in  ID3D11Resource *pResource,
1459		__in  unsigned Subresource)
1460	{
1461		SYNCHRONIZED;
1462		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1463		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(Subresource);
1464		if(i != resource->transfers.end())
1465		{
1466			pipe->transfer_unmap(pipe, i->second);
1467			pipe->transfer_destroy(pipe,  i->second);
1468			resource->transfers.erase(i);
1469		}
1470	}
1471
1472	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1473		__in  ID3D11Resource *pDstResource,
1474		__in  unsigned DstSubresource,
1475		__in  unsigned DstX,
1476		__in  unsigned DstY,
1477		__in  unsigned DstZ,
1478		__in  ID3D11Resource *pSrcResource,
1479		__in  unsigned SrcSubresource,
1480		__in_opt  const D3D11_BOX *pSrcBox)
1481	{
1482		SYNCHRONIZED;
1483		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1484		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
1485		pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
1486		pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource);
1487		pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, pSrcBox);
1488		for(unsigned i = 0; i < box.depth; ++i)
1489		{
1490			pipe->resource_copy_region(pipe,
1491				dst->resource, subdst, DstX, DstY, DstZ + i,
1492				src->resource, subsrc, box.x, box.y, box.z + i,
1493				box.width, box.height);
1494		}
1495	}
1496
1497	virtual void STDMETHODCALLTYPE CopyResource(
1498		__in  ID3D11Resource *pDstResource,
1499		__in  ID3D11Resource *pSrcResource)
1500	{
1501		SYNCHRONIZED;
1502		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1503		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
1504		pipe_subresource sr;
1505		unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1;
1506
1507		for(sr.face = 0; sr.face < faces; ++sr.face)
1508		{
1509			for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level)
1510			{
1511				unsigned w = u_minify(dst->resource->width0, sr.level);
1512				unsigned h = u_minify(dst->resource->height0, sr.level);
1513				unsigned d = u_minify(dst->resource->depth0, sr.level);
1514				for(unsigned i = 0; i < d; ++i)
1515				{
1516					pipe->resource_copy_region(pipe,
1517							dst->resource, sr, 0, 0, i,
1518							src->resource, sr, 0, 0, i,
1519							w, h);
1520				}
1521			}
1522		}
1523	}
1524
1525	virtual void STDMETHODCALLTYPE UpdateSubresource(
1526		__in  ID3D11Resource *pDstResource,
1527		__in  unsigned DstSubresource,
1528		__in_opt  const D3D11_BOX *pDstBox,
1529		__in  const void *pSrcData,
1530		__in  unsigned SrcRowPitch,
1531		__in  unsigned SrcDepthPitch)
1532	{
1533		SYNCHRONIZED;
1534		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1535		pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
1536		pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox);
1537		pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, SrcRowPitch, SrcDepthPitch);
1538	}
1539
1540#if API >= 11
1541	virtual void STDMETHODCALLTYPE CopyStructureCount(
1542		__in  ID3D11Buffer *pDstBuffer,
1543		__in  unsigned DstAlignedByteOffset,
1544		__in  ID3D11UnorderedAccessView *pSrcView)
1545	{
1546		SYNCHRONIZED;
1547	}
1548#endif
1549
1550	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1551		__in  ID3D11RenderTargetView *pRenderTargetView,
1552		__in  const float ColorRGBA[4])
1553	{
1554		SYNCHRONIZED;
1555		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)pRenderTargetView);
1556		pipe->clear_render_target(pipe, view->object, ColorRGBA, 0, 0, view->object->width, view->object->height);
1557	}
1558
1559	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1560		__in  ID3D11DepthStencilView *pDepthStencilView,
1561		__in  unsigned ClearFlags,
1562		__in  float Depth,
1563		__in  UINT8 Stencil)
1564	{
1565		SYNCHRONIZED;
1566		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)pDepthStencilView);
1567		unsigned flags = 0;
1568		if(ClearFlags & D3D11_CLEAR_DEPTH)
1569			flags |= PIPE_CLEAR_DEPTH;
1570		if(ClearFlags & D3D11_CLEAR_STENCIL)
1571			flags |= PIPE_CLEAR_STENCIL;
1572		pipe->clear_depth_stencil(pipe, view->object, flags, Depth, Stencil, 0, 0, view->object->width, view->object->height);
1573	}
1574
1575#if API >= 11
1576	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1577		__in  ID3D11UnorderedAccessView *pUnorderedAccessView,
1578		__in  const unsigned Values[ 4 ])
1579	{
1580		SYNCHRONIZED;
1581	}
1582
1583	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1584			__in  ID3D11UnorderedAccessView *pUnorderedAccessView,
1585			__in  const float Values[ 4 ])
1586	{
1587		SYNCHRONIZED;
1588	}
1589#endif
1590
1591	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1592	{
1593		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1594		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1595		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1596		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1597		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1598		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1599		if(caps.gs)
1600			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1601		set_framebuffer();
1602		set_viewport();
1603		set_clip();
1604		set_render_condition();
1605		// TODO: restore stream output
1606
1607		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1608	}
1609
1610	virtual void STDMETHODCALLTYPE GenerateMips(
1611			__in  ID3D11ShaderResourceView *pShaderResourceView)
1612	{
1613		SYNCHRONIZED;
1614
1615		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)pShaderResourceView;
1616		if(caps.gs)
1617			pipe->bind_gs_state(pipe, 0);
1618		if(caps.so)
1619			pipe->bind_stream_output_state(pipe, 0);
1620		if(pipe->render_condition)
1621			pipe->render_condition(pipe, 0, 0);
1622		util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1623		RestoreGalliumStateBlitOnly();
1624	}
1625
1626	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1627	{
1628		SYNCHRONIZED;
1629		RestoreGalliumStateBlitOnly();
1630
1631		set_index_buffer();
1632		set_stencil_ref();
1633		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1634		pipe->set_sample_mask(pipe, sample_mask);
1635
1636		for(unsigned s = 0; s < 3; ++s)
1637		{
1638			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1639			for(unsigned i = 0; i < num; ++i)
1640				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1641		}
1642
1643		if(caps.so)
1644			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1645
1646		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1647		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1648
1649		set_scissor();
1650	}
1651
1652#if API >= 11
1653	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1654	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1655		__in  ID3D11Resource *pResource,
1656		float MinLOD)
1657	{
1658		SYNCHRONIZED;
1659		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1660		if(resource->min_lod != MinLOD)
1661		{
1662			// TODO: actually do anything?
1663			resource->min_lod = MinLOD;
1664		}
1665	}
1666
1667	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1668		__in  ID3D11Resource *pResource)
1669	{
1670		SYNCHRONIZED;
1671		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1672		return resource->min_lod;
1673	}
1674#endif
1675
1676	virtual void STDMETHODCALLTYPE ResolveSubresource(
1677		__in  ID3D11Resource *pDstResource,
1678		__in  unsigned DstSubresource,
1679		__in  ID3D11Resource *pSrcResource,
1680		__in  unsigned SrcSubresource,
1681		__in  DXGI_FORMAT Format)
1682	{
1683		SYNCHRONIZED;
1684		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1685		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
1686		pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
1687		pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource);
1688		pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc);
1689	}
1690
1691#if API >= 11
1692	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1693		__in  ID3D11CommandList *pCommandList,
1694		BOOL RestoreContextState)
1695	{
1696		SYNCHRONIZED;
1697	}
1698
1699	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1700		BOOL RestoreDeferredContextState,
1701		__out_opt  ID3D11CommandList **ppCommandList)
1702	{
1703		SYNCHRONIZED;
1704		return E_NOTIMPL;
1705	}
1706#endif
1707
1708	virtual void STDMETHODCALLTYPE ClearState(void)
1709	{
1710		SYNCHRONIZED;
1711
1712		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1713		// TODO: make sure all this gets inlined, which might require more compiler flags
1714		// TODO: optimize this
1715#if API >= 11
1716		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1717		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1718		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1719		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1720		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1721		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1722#else
1723		GalliumD3D11DeviceContext::PSSetShader(0);
1724		GalliumD3D11DeviceContext::GSSetShader(0);
1725		GalliumD3D11DeviceContext::VSSetShader(0);
1726#endif
1727
1728		GalliumD3D11DeviceContext::IASetInputLayout(0);
1729		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1730		GalliumD3D11DeviceContext::RSSetState(0);
1731		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1732		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1733		GalliumD3D11DeviceContext::SetPredication(0, 0);
1734		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1735
1736		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1737		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1738		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1739#if API >= 11
1740		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1741		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1742		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1743#endif
1744
1745		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1746#if API >= 11
1747		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1748#else
1749		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1750#endif
1751		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1752
1753		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1754		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1755		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1756#if API >= 11
1757		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1758		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1759		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1760#endif
1761
1762		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1763		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1764		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1765#if API >= 11
1766		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1767		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1768		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1769#endif
1770
1771		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1772		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1773	}
1774
1775	virtual void STDMETHODCALLTYPE Flush(void)
1776	{
1777		SYNCHRONIZED;
1778		pipe->flush(pipe, PIPE_FLUSH_FRAME, 0);
1779	}
1780
1781	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1782	 * cleanly unbound from the pipeline.
1783	 * In Direct3D 11, the pipeline holds a reference.
1784	 *
1785	 * Note that instead of always scanning the pipeline on destruction, we could
1786	 * maintain the internal reference count on DirectX 10 and use it to check if an
1787	 * object is still bound.
1788	 * Presumably, on average, scanning is faster if the application is well written.
1789	 */
1790#if API < 11
1791#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1792	void Unbind##name(ID3D11##name* state) \
1793	{ \
1794		SYNCHRONIZED; \
1795		if((void*)state == (void*)member.p) \
1796		{ \
1797			member.p = 0; \
1798			pipe->bind_##gallium##_state(pipe, default_##def); \
1799		} \
1800	}
1801	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1802	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1803	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1804	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1805	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1806	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1807	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1808
1809	void UnbindPredicate(ID3D11Predicate* predicate)
1810	{
1811		SYNCHRONIZED;
1812		if(predicate == render_predicate)
1813		{
1814			render_predicate.p = NULL;
1815			render_predicate_value = 0;
1816			pipe->render_condition(pipe, 0, 0);
1817		}
1818	}
1819
1820	void UnbindSamplerState(ID3D11SamplerState* state)
1821	{
1822		SYNCHRONIZED;
1823		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1824		{
1825			for(unsigned i = 0; i < num_samplers[s]; ++i)
1826			{
1827				if(samplers[s][i] == state)
1828				{
1829					samplers[s][i].p = NULL;
1830					sampler_csos[s].v[i] = NULL;
1831					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1832				}
1833			}
1834		}
1835	}
1836
1837	void UnbindBuffer(ID3D11Buffer* buffer)
1838	{
1839		SYNCHRONIZED;
1840		if(buffer == index_buffer)
1841		{
1842			index_buffer.p = 0;
1843			index_format = DXGI_FORMAT_UNKNOWN;
1844			index_offset = 0;
1845			struct pipe_index_buffer ib;
1846			memset(&ib, 0, sizeof(ib));
1847			pipe->set_index_buffer(pipe, &ib);
1848		}
1849
1850		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1851		{
1852			if(buffer == input_buffers[i])
1853			{
1854				input_buffers[i].p = 0;
1855				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1856				update_flags |= UPDATE_VERTEX_BUFFERS;
1857			}
1858		}
1859
1860		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1861		{
1862			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1863			{
1864				if(constant_buffers[s][i] == buffer)
1865				{
1866					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1867					pipe->set_constant_buffer(pipe, s, i, NULL);
1868				}
1869			}
1870		}
1871	}
1872
1873	void UnbindDepthStencilView(ID3D11DepthStencilView* view)
1874	{
1875		SYNCHRONIZED;
1876		if(view == depth_stencil_view)
1877		{
1878			depth_stencil_view.p = NULL;
1879			set_framebuffer();
1880		}
1881	}
1882
1883	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1884	{
1885		SYNCHRONIZED;
1886		bool any_bound = false;
1887		for(unsigned i = 0; i < num_render_target_views; ++i)
1888		{
1889			if(render_target_views[i] == view)
1890			{
1891				render_target_views[i].p = NULL;
1892				any_bound = true;
1893			}
1894		}
1895		if(any_bound)
1896			set_framebuffer();
1897	}
1898
1899	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1900	{
1901		SYNCHRONIZED;
1902		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1903		{
1904			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1905			{
1906				if(shader_resource_views[s][i] == view)
1907				{
1908					shader_resource_views[s][i].p = NULL;
1909					sampler_views[s][i] = NULL;
1910					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1911				}
1912			}
1913		}
1914	}
1915#endif
1916
1917#undef SYNCHRONIZED
1918};
1919
1920#if API >= 11
1921/* This approach serves two purposes.
1922 * First, we don't want to do an atomic operation to manipulate the reference
1923 * count every time something is bound/unbound to the pipeline, since they are
1924 * expensive.
1925 * Fortunately, the immediate context can only be used by a single thread, so
1926 * we don't have to use them, as long as a separate reference count is used
1927 * (see dual_refcnt_t).
1928 *
1929 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1930 * garbage cycle.
1931 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
1932 * one for each external reference count, while internal nonatomic_add_ref doesn't
1933 * add any.
1934 *
1935 * Note that ideally we would to eliminate the non-atomic op too, but this is more
1936 * complicated, since we would either need to use garbage collection and give up
1937 * deterministic destruction (especially bad for large textures), or scan the whole
1938 * pipeline state every time the reference count of object drops to 0, which risks
1939 * pathological slowdowns.
1940 *
1941 * Since this microoptimization should matter relatively little, let's avoid it for now.
1942 *
1943 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
1944 * Eliminating the atomic ops for deferred contexts seems substantially harder.
1945 * This might be a problem if they are used in a one-shot multithreaded rendering
1946 * fashion, where SMP cacheline bouncing on the reference count may be visible.
1947 *
1948 * The idea would be to attach a structure of reference counts indexed by deferred
1949 * context id to each object. Ideally, this should be organized like ext2 block pointers.
1950 *
1951 * Every deferred context would get a reference count in its own cacheline.
1952 * The external count is protected by a lock bit, and there is also a "lock bit" in each
1953 * internal count.
1954 *
1955 * When the external count has to be dropped to 0, the lock bit is taken and all internal
1956 * reference counts are scanned, taking a count of them. A flag would also be set on them.
1957 * Deferred context manipulation would notice the flag, and update the count.
1958 * Once the count goes to zero, the object is freed.
1959 *
1960 * The problem of this is that if the external reference count ping-pongs between
1961 * zero and non-zero, the scans will take a lot of time.
1962 *
1963 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
1964 * each binary tree node would have a "determined bit", which would be invalidated
1965 * by manipulations.
1966 *
1967 * However, all this complexity might actually be a loss in most cases, so let's just
1968 * stick to a single atomic refcnt for now.
1969 *
1970 * Also, we don't even support deferred contexts yet, so this can wait.
1971 */
1972struct nonatomic_device_child_ptr_traits
1973{
1974	static void add_ref(void* p)
1975	{
1976		if(p)
1977			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
1978	}
1979
1980	static void release(void* p)
1981	{
1982		if(p)
1983			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
1984	}
1985};
1986
1987struct GalliumD3D11ImmediateDeviceContext
1988	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
1989{
1990	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
1991	: GalliumD3D11DeviceContext(device, pipe, context_flags)
1992	{
1993		// not necessary, but tests that the API at least basically works
1994		ClearState();
1995	}
1996
1997	/* we do this since otherwise we would have a garbage cycle between this and the device */
1998	virtual ULONG STDMETHODCALLTYPE AddRef()
1999	{
2000		return this->device->AddRef();
2001	}
2002
2003	virtual ULONG STDMETHODCALLTYPE Release()
2004	{
2005		return this->device->Release();
2006	}
2007
2008	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2009	{
2010		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2011	}
2012};
2013
2014static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2015{
2016	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2017}
2018
2019static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2020{
2021	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2022}
2023
2024static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2025{
2026	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2027}
2028
2029static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2030{
2031	delete (GalliumD3D11ImmediateDeviceContext*)context;
2032}
2033#endif
2034