d3d11_context.h revision dc4c821f0817a3db716f965692fb701079f66340
169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal/**************************************************************************
269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal *
369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * Copyright 2010 Luca Barbieri
469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal *
569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * Permission is hereby granted, free of charge, to any person obtaining
669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * a copy of this software and associated documentation files (the
769e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * "Software"), to deal in the Software without restriction, including
869e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * without limitation the rights to use, copy, modify, merge, publish,
969e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * distribute, sublicense, and/or sell copies of the Software, and to
1069e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * permit persons to whom the Software is furnished to do so, subject to
1169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * the following conditions:
1269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal *
1369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * The above copyright notice and this permission notice (including the
1469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * next paragraph) shall be included in all copies or substantial
1569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * portions of the Software.
1669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal *
1769e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1869e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1969e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2069e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
2169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal *
2569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal **************************************************************************/
2669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal
2769e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal/* used to unbind things, we need 128 due to resources */
2869e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalstatic const void* zero_data[128];
2969e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal
3069e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
3169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
3269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
3369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal
3469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#if API >= 11
3569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigaltemplate<typename PtrTraits>
3669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalstruct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
71	DXGI_FORMAT index_format;
72	unsigned index_offset;
73	uint32_t strip_cut_index;
74	BOOL render_predicate_value;
75	float blend_color[4];
76	unsigned sample_mask;
77	unsigned stencil_ref;
78
79	void* default_input_layout;
80	void* default_rasterizer;
81	void* default_depth_stencil;
82	void* default_blend;
83	void* default_sampler;
84	void* default_shaders[D3D11_STAGES];
85
86	// derived state
87	int primitive_mode;
88	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
89	struct pipe_stream_output_target* so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
90	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
91	void* sampler_csos[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
92	unsigned num_shader_resource_views[D3D11_STAGES];
93	unsigned num_samplers[D3D11_STAGES];
94	unsigned num_vertex_buffers;
95	unsigned num_render_target_views;
96	unsigned num_viewports;
97	unsigned num_scissor_rects;
98	unsigned num_so_targets;
99
100	struct pipe_context* pipe;
101	unsigned update_flags;
102
103	bool owns_pipe;
104	unsigned context_flags;
105
106	GalliumD3D11Caps caps;
107
108	cso_context* cso_ctx;
109	gen_mipmap_state* gen_mipmap;
110
111#if API >= 11
112#define SYNCHRONIZED do {} while(0)
113
114	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
115	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
116	{
117		caps = device->screen_caps;
118		init_context();
119	}
120
121	~GalliumD3D11DeviceContext()
122	{
123		destroy_context();
124	}
125#else
126#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
127
128	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
129	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
130	{
131		caps = this->screen_caps;
132		init_context();
133	}
134
135	~GalliumD3D10Device()
136	{
137		destroy_context();
138	}
139#endif
140
141	void init_context()
142	{
143		if(!pipe->begin_query)
144			caps.queries = false;
145		if(!pipe->bind_gs_state)
146		{
147			caps.gs = false;
148			caps.stages = 2;
149		}
150		assert(!caps.so || pipe->set_stream_output_targets);
151		if(!pipe->set_geometry_sampler_views)
152			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
153		if(!pipe->set_fragment_sampler_views)
154			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
155		if(!pipe->set_vertex_sampler_views)
156			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
157
158		update_flags = 0;
159
160		// pipeline state
161		memset(viewports, 0, sizeof(viewports));
162		memset(scissor_rects, 0, sizeof(scissor_rects));
163		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
164		index_format = DXGI_FORMAT_UNKNOWN;
165		index_offset = 0;
166		strip_cut_index = 0xffffffff;
167		render_predicate_value = 0;
168		memset(blend_color, 0, sizeof(blend_color));
169		sample_mask = ~0;
170		stencil_ref = 0;
171
172		// derived state
173		primitive_mode = 0;
174		memset(vertex_buffers, 0, sizeof(vertex_buffers));
175		memset(so_targets, 0, sizeof(so_buffers));
176		memset(sampler_views, 0, sizeof(sampler_views));
177		memset(sampler_csos, 0, sizeof(sampler_csos));
178		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
179		memset(num_samplers, 0, sizeof(num_samplers));
180		num_vertex_buffers = 0;
181		num_render_target_views = 0;
182		num_viewports = 0;
183		num_scissor_rects = 0;
184		num_so_targets = 0;
185
186		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
187
188		struct pipe_rasterizer_state rasterizerd;
189		memset(&rasterizerd, 0, sizeof(rasterizerd));
190		rasterizerd.gl_rasterization_rules = 1;
191		rasterizerd.cull_face = PIPE_FACE_BACK;
192		rasterizerd.flatshade_first = 1;
193		rasterizerd.line_width = 1.0f;
194		rasterizerd.point_size = 1.0f;
195		rasterizerd.depth_clip = TRUE;
196		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
197
198		struct pipe_depth_stencil_alpha_state depth_stencild;
199		memset(&depth_stencild, 0, sizeof(depth_stencild));
200		depth_stencild.depth.enabled = TRUE;
201		depth_stencild.depth.writemask = 1;
202		depth_stencild.depth.func = PIPE_FUNC_LESS;
203		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
204
205		struct pipe_blend_state blendd;
206		memset(&blendd, 0, sizeof(blendd));
207		blendd.rt[0].colormask = 0xf;
208		default_blend = pipe->create_blend_state(pipe, &blendd);
209
210		struct pipe_sampler_state samplerd;
211		memset(&samplerd, 0, sizeof(samplerd));
212		samplerd.normalized_coords = 1;
213		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
214		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
215		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
216		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
217		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
218		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
219		samplerd.border_color.f[0] = 1.0f;
220		samplerd.border_color.f[1] = 1.0f;
221		samplerd.border_color.f[2] = 1.0f;
222		samplerd.border_color.f[3] = 1.0f;
223		samplerd.min_lod = -FLT_MAX;
224		samplerd.max_lod = FLT_MAX;
225		samplerd.max_anisotropy = 1;
226		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
227
228		memset(&samplerd, 0, sizeof(samplerd));
229		samplerd.normalized_coords = 0;
230		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
231		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
232		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
233		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
234		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
235		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
236		samplerd.min_lod = -FLT_MAX;
237		samplerd.max_lod = FLT_MAX;
238		samplerd.max_anisotropy = 1;
239
240		for(unsigned s = 0; s < D3D11_STAGES; ++s)
241			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
242				sampler_csos[s][i] = default_sampler;
243
244		// TODO: should this really be empty shaders, or should they be all-passthrough?
245		memset(default_shaders, 0, sizeof(default_shaders));
246		struct ureg_program *ureg;
247		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
248		ureg_END(ureg);
249		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
250
251		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
252		ureg_END(ureg);
253		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
254
255		cso_ctx = cso_create_context(pipe);
256		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
257
258		RestoreGalliumState();
259	}
260
261	void destroy_context()
262	{
263		util_destroy_gen_mipmap(gen_mipmap);
264		cso_destroy_context(cso_ctx);
265
266		pipe->bind_vertex_elements_state(pipe, 0);
267		pipe->delete_vertex_elements_state(pipe, default_input_layout);
268
269		pipe->bind_rasterizer_state(pipe, 0);
270		pipe->delete_rasterizer_state(pipe, default_rasterizer);
271
272		pipe->bind_depth_stencil_alpha_state(pipe, 0);
273		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
274
275		pipe->bind_blend_state(pipe, 0);
276		pipe->delete_blend_state(pipe, default_blend);
277
278		pipe->bind_fragment_sampler_states(pipe, 0, 0);
279		pipe->bind_vertex_sampler_states(pipe, 0, 0);
280		if(pipe->bind_geometry_sampler_states)
281			pipe->bind_geometry_sampler_states(pipe, 0, 0);
282		pipe->delete_sampler_state(pipe, default_sampler);
283
284		pipe->bind_fs_state(pipe, 0);
285		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
286
287		pipe->bind_vs_state(pipe, 0);
288		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
289
290		if(owns_pipe)
291			pipe->destroy(pipe);
292	}
293
294	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
295	{
296		return context_flags;
297	}
298#if API >= 11
299#define SET_SHADER_EXTRA_ARGS , \
300	ID3D11ClassInstance *const *ppClassInstances, \
301	unsigned count
302#define GET_SHADER_EXTRA_ARGS , \
303		ID3D11ClassInstance **ppClassInstances, \
304		unsigned *out_count
305#else
306#define SET_SHADER_EXTRA_ARGS
307#define GET_SHADER_EXTRA_ARGS
308#endif
309
310/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
311 * Instead, you have to pass a pointer to nulls to unbind things.
312 * We do the same.
313 * TODO: is D3D10 the same?
314 */
315	template<unsigned s>
316	void xs_set_shader(GalliumD3D11Shader<>* shader)
317	{
318		if(shader != shaders[s].p)
319		{
320			shaders[s] = shader;
321			void* shader_cso = shader ? shader->object : default_shaders[s];
322			switch(s)
323			{
324			case PIPE_SHADER_VERTEX:
325				pipe->bind_vs_state(pipe, shader_cso);
326				break;
327			case PIPE_SHADER_FRAGMENT:
328				pipe->bind_fs_state(pipe, shader_cso);
329				break;
330			case PIPE_SHADER_GEOMETRY:
331				pipe->bind_gs_state(pipe, shader_cso);
332				break;
333			}
334			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
335		}
336	}
337
338	template<unsigned s>
339	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
340	{
341		for(unsigned i = 0; i < count; ++i)
342		{
343			if(constbufs[i] != constant_buffers[s][start + i].p)
344			{
345				constant_buffers[s][start + i] = constbufs[i];
346				if(s < caps.stages && start + i < caps.constant_buffers[s])
347					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
348			}
349		}
350	}
351
352	template<unsigned s>
353	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
354	{
355		int last_different = -1;
356		for(unsigned i = 0; i < count; ++i)
357		{
358			if(shader_resource_views[s][start + i].p != srvs[i])
359			{
360				shader_resource_views[s][start + i] = srvs[i];
361				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
362				last_different = i;
363			}
364		}
365		if(last_different >= 0)
366		{
367			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
368			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
369		}
370	}
371
372	template<unsigned s>
373	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
374	{
375		int last_different = -1;
376		for(unsigned i = 0; i < count; ++i)
377		{
378			if(samplers[s][start + i].p != samps[i])
379			{
380				samplers[s][start + i] = samps[i];
381				sampler_csos[s][start + i] = samps[i] ? samps[i]->object : default_sampler;
382				last_different = i;
383			}
384		}
385		if(last_different >= 0)
386		{
387			num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
388			update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
389		}
390	}
391
392#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
393	virtual void STDMETHODCALLTYPE XS##SetShader( \
394		ID3D11##Stage##Shader *pShader \
395		SET_SHADER_EXTRA_ARGS) \
396	{ \
397		SYNCHRONIZED; \
398		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
399	} \
400	virtual void STDMETHODCALLTYPE XS##GetShader(\
401		ID3D11##Stage##Shader **ppShader \
402		GET_SHADER_EXTRA_ARGS) \
403	{ \
404		SYNCHRONIZED; \
405		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
406	} \
407	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
408		unsigned start, \
409		unsigned count, \
410		ID3D11Buffer *const* constant_buffers) \
411	{ \
412		SYNCHRONIZED; \
413		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
414	} \
415	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
416		unsigned start, \
417		unsigned count, \
418		ID3D11Buffer **out_constant_buffers) \
419	{ \
420		SYNCHRONIZED; \
421		for(unsigned i = 0; i < count; ++i) \
422			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
423	} \
424	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
425		unsigned start, \
426		unsigned count, \
427		ID3D11ShaderResourceView *const *new_shader_resource_views) \
428	{ \
429		SYNCHRONIZED; \
430		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
431	} \
432	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
433		unsigned start, \
434		unsigned count, \
435		ID3D11ShaderResourceView **out_shader_resource_views) \
436	{ \
437		SYNCHRONIZED; \
438		for(unsigned i = 0; i < count; ++i) \
439			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
440	} \
441	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
442		unsigned start, \
443		unsigned count, \
444		ID3D11SamplerState *const *new_samplers) \
445	{ \
446		SYNCHRONIZED; \
447		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
448	} \
449	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
450		unsigned start, \
451		unsigned count, \
452		ID3D11SamplerState **out_samplers) \
453	{ \
454		SYNCHRONIZED; \
455		for(unsigned i = 0; i < count; ++i) \
456			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
457	}
458
459#define DO_VS(x) x
460#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
461#define DO_PS(x) x
462#define DO_HS(x)
463#define DO_DS(x)
464#define DO_CS(x)
465	IMPLEMENT_SHADER_STAGE(VS, Vertex)
466	IMPLEMENT_SHADER_STAGE(GS, Geometry)
467	IMPLEMENT_SHADER_STAGE(PS, Pixel)
468
469#if API >= 11
470	IMPLEMENT_SHADER_STAGE(HS, Hull)
471	IMPLEMENT_SHADER_STAGE(DS, Domain)
472	IMPLEMENT_SHADER_STAGE(CS, Compute)
473
474	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
475		unsigned start,
476		unsigned count,
477		ID3D11UnorderedAccessView *const *new_unordered_access_views,
478		const unsigned *new_uav_initial_counts)
479	{
480		SYNCHRONIZED;
481		for(unsigned i = 0; i < count; ++i)
482			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
483	}
484
485	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
486		unsigned start,
487		unsigned count,
488		ID3D11UnorderedAccessView **out_unordered_access_views)
489	{
490		SYNCHRONIZED;
491		for(unsigned i = 0; i < count; ++i)
492			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
493	}
494#endif
495
496	template<unsigned s>
497	void update_stage()
498	{
499		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
500		{
501			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
502				--num_shader_resource_views[s];
503			if((1 << s) & caps.stages_with_sampling)
504			{
505				const unsigned num_views_to_bind = num_shader_resource_views[s];
506				switch(s)
507				{
508				case PIPE_SHADER_VERTEX:
509					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, sampler_views[s]);
510					break;
511				case PIPE_SHADER_FRAGMENT:
512					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, sampler_views[s]);
513					break;
514				case PIPE_SHADER_GEOMETRY:
515					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, sampler_views[s]);
516					break;
517				}
518			}
519		}
520
521		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
522		{
523			while(num_samplers[s] && !sampler_csos[s][num_samplers[s] - 1])
524				--num_samplers[s];
525			if((1 << s) & caps.stages_with_sampling)
526			{
527				const unsigned num_samplers_to_bind = num_samplers[s];
528				switch(s)
529				{
530				case PIPE_SHADER_VERTEX:
531					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]);
532					break;
533				case PIPE_SHADER_FRAGMENT:
534					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]);
535					break;
536				case PIPE_SHADER_GEOMETRY:
537					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]);
538					break;
539				}
540			}
541		}
542	}
543
544	void update_state()
545	{
546		update_stage<D3D11_STAGE_PS>();
547		update_stage<D3D11_STAGE_VS>();
548		update_stage<D3D11_STAGE_GS>();
549#if API >= 11
550		update_stage<D3D11_STAGE_HS>();
551		update_stage<D3D11_STAGE_DS>();
552		update_stage<D3D11_STAGE_CS>();
553#endif
554
555		if(update_flags & UPDATE_VERTEX_BUFFERS)
556		{
557			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
558				--num_vertex_buffers;
559			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
560		}
561
562		update_flags = 0;
563	}
564
565	virtual void STDMETHODCALLTYPE IASetInputLayout(
566		ID3D11InputLayout *new_input_layout)
567	{
568		SYNCHRONIZED;
569		if(new_input_layout != input_layout.p)
570		{
571			input_layout = new_input_layout;
572			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
573		}
574	}
575
576	virtual void STDMETHODCALLTYPE IAGetInputLayout(
577		ID3D11InputLayout **out_input_layout)
578	{
579		SYNCHRONIZED;
580		*out_input_layout = input_layout.ref();
581	}
582
583	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
584		unsigned start,
585		unsigned count,
586		ID3D11Buffer *const *new_vertex_buffers,
587		const unsigned *new_strides,
588		const unsigned *new_offsets)
589	{
590		SYNCHRONIZED;
591		int last_different = -1;
592		for(unsigned i = 0; i < count; ++i)
593		{
594			ID3D11Buffer* buffer = new_vertex_buffers[i];
595			if(buffer != input_buffers[start + i].p
596				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
597				|| vertex_buffers[start + i].stride != new_strides[i]
598			)
599			{
600				input_buffers[start + i] = buffer;
601				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
602				vertex_buffers[start + i].buffer_offset = new_offsets[i];
603				vertex_buffers[start + i].stride = new_strides[i];
604				last_different = i;
605			}
606		}
607		if(last_different >= 0)
608		{
609			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
610			update_flags |= UPDATE_VERTEX_BUFFERS;
611		}
612	}
613
614	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
615		unsigned start,
616		unsigned count,
617		ID3D11Buffer **out_vertex_buffers,
618		unsigned *out_strides,
619		unsigned *out_offsets)
620	{
621		SYNCHRONIZED;
622		if(out_vertex_buffers)
623		{
624			for(unsigned i = 0; i < count; ++i)
625				out_vertex_buffers[i] = input_buffers[start + i].ref();
626		}
627
628		if(out_offsets)
629		{
630			for(unsigned i = 0; i < count; ++i)
631				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
632		}
633
634		if(out_strides)
635		{
636			for(unsigned i = 0; i < count; ++i)
637				out_strides[i] = vertex_buffers[start + i].stride;
638		}
639	}
640
641	void set_index_buffer()
642	{
643		pipe_index_buffer ib;
644		if(!index_buffer)
645		{
646			memset(&ib, 0, sizeof(ib));
647		}
648		else
649		{
650			switch(index_format) {
651			case DXGI_FORMAT_R32_UINT:
652				ib.index_size = 4;
653				strip_cut_index = 0xffffffff;
654				break;
655			case DXGI_FORMAT_R16_UINT:
656				ib.index_size = 2;
657				strip_cut_index = 0xffff;
658				break;
659			default:
660				ib.index_size = 1;
661				strip_cut_index = 0xff;
662				break;
663			}
664			ib.offset = index_offset;
665			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
666		}
667		pipe->set_index_buffer(pipe, &ib);
668	}
669
670	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
671		ID3D11Buffer *new_index_buffer,
672		DXGI_FORMAT new_index_format,
673		unsigned new_index_offset)
674	{
675		SYNCHRONIZED;
676		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
677		{
678			index_buffer = new_index_buffer;
679			index_format = new_index_format;
680			index_offset = new_index_offset;
681
682			set_index_buffer();
683		}
684	}
685
686	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
687		ID3D11Buffer **out_index_buffer,
688		DXGI_FORMAT *out_index_format,
689		unsigned *out_index_offset)
690	{
691		SYNCHRONIZED;
692		if(out_index_buffer)
693			*out_index_buffer = index_buffer.ref();
694		if(out_index_format)
695			*out_index_format = index_format;
696		if(out_index_offset)
697			*out_index_offset = index_offset;
698	}
699
700	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
701		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
702	{
703		SYNCHRONIZED;
704		if(primitive_topology != new_primitive_topology)
705		{
706			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
707				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
708			else
709				primitive_mode = 0;
710			primitive_topology = new_primitive_topology;
711		}
712	}
713
714	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
715		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
716	{
717		SYNCHRONIZED;
718		*out_primitive_topology = primitive_topology;
719	}
720
721	virtual void STDMETHODCALLTYPE DrawIndexed(
722		unsigned index_count,
723		unsigned start_index_location,
724		int base_vertex_location)
725	{
726		SYNCHRONIZED;
727		if(update_flags)
728			update_state();
729
730		pipe_draw_info info;
731		info.mode = primitive_mode;
732		info.indexed = TRUE;
733		info.count = index_count;
734		info.start = start_index_location;
735		info.index_bias = base_vertex_location;
736		info.min_index = 0;
737		info.max_index = ~0;
738		info.start_instance = 0;
739		info.instance_count = 1;
740		info.primitive_restart = TRUE;
741		info.restart_index = strip_cut_index;
742		info.count_from_stream_output = NULL;
743
744		pipe->draw_vbo(pipe, &info);
745	}
746
747	virtual void STDMETHODCALLTYPE Draw(
748		unsigned vertex_count,
749		unsigned start_vertex_location)
750	{
751		SYNCHRONIZED;
752		if(update_flags)
753			update_state();
754
755		pipe_draw_info info;
756		info.mode = primitive_mode;
757		info.indexed = FALSE;
758		info.count = vertex_count;
759		info.start = start_vertex_location;
760		info.index_bias = 0;
761		info.min_index = 0;
762		info.max_index = ~0;
763		info.start_instance = 0;
764		info.instance_count = 1;
765		info.primitive_restart = FALSE;
766		info.count_from_stream_output = NULL;
767
768		pipe->draw_vbo(pipe, &info);
769	}
770
771	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
772		unsigned index_countPerInstance,
773		unsigned instance_count,
774		unsigned start_index_location,
775		int base_vertex_location,
776		unsigned start_instance_location)
777	{
778		SYNCHRONIZED;
779		if(update_flags)
780			update_state();
781
782		pipe_draw_info info;
783		info.mode = primitive_mode;
784		info.indexed = TRUE;
785		info.count = index_countPerInstance;
786		info.start = start_index_location;
787		info.index_bias = base_vertex_location;
788		info.min_index = 0;
789		info.max_index = ~0;
790		info.start_instance = start_instance_location;
791		info.instance_count = instance_count;
792		info.primitive_restart = TRUE;
793		info.restart_index = strip_cut_index;
794		info.count_from_stream_output = NULL;
795
796		pipe->draw_vbo(pipe, &info);
797	}
798
799	virtual void STDMETHODCALLTYPE DrawInstanced(
800		unsigned vertex_countPerInstance,
801		unsigned instance_count,
802		unsigned start_vertex_location,
803		unsigned start_instance_location)
804	{
805		SYNCHRONIZED;
806		if(update_flags)
807			update_state();
808
809		pipe_draw_info info;
810		info.mode = primitive_mode;
811		info.indexed = FALSE;
812		info.count = vertex_countPerInstance;
813		info.start = start_vertex_location;
814		info.index_bias = 0;
815		info.min_index = 0;
816		info.max_index = ~0;
817		info.start_instance = start_instance_location;
818		info.instance_count = instance_count;
819		info.primitive_restart = FALSE;
820		info.count_from_stream_output = NULL;
821
822		pipe->draw_vbo(pipe, &info);
823	}
824
825	virtual void STDMETHODCALLTYPE DrawAuto(void)
826	{
827		if(!caps.so)
828			return;
829
830		SYNCHRONIZED;
831		if(update_flags)
832			update_state();
833
834		pipe_draw_info info;
835		info.mode = primitive_mode;
836		info.indexed = FALSE;
837		info.count = 0;
838		info.start = 0;
839		info.index_bias = 0;
840		info.min_index = 0;
841		info.max_index = ~0;
842		info.start_instance = 0;
843		info.instance_count = 1;
844		info.primitive_restart = FALSE;
845		info.restart_index = 0;
846		info.count_from_stream_output = input_buffers[0].p->so_target;
847
848		pipe->draw_vbo(pipe, &info);
849	}
850
851	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
852		ID3D11Buffer *buffer,
853		unsigned aligned_byte_offset)
854	{
855		SYNCHRONIZED;
856		if(update_flags)
857			update_state();
858
859		struct {
860			unsigned count;
861			unsigned instance_count;
862			unsigned start;
863			unsigned index_bias;
864		} data;
865
866		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
867
868		pipe_draw_info info;
869		info.mode = primitive_mode;
870		info.indexed = TRUE;
871		info.start = data.start;
872		info.count = data.count;
873		info.index_bias = data.index_bias;
874		info.min_index = 0;
875		info.max_index = ~0;
876		info.start_instance = 0;
877		info.instance_count = data.instance_count;
878		info.primitive_restart = TRUE;
879		info.restart_index = strip_cut_index;
880		info.count_from_stream_output = NULL;
881
882		pipe->draw_vbo(pipe, &info);
883	}
884
885	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
886		ID3D11Buffer *buffer,
887		unsigned aligned_byte_offset)
888	{
889		SYNCHRONIZED;
890		if(update_flags)
891			update_state();
892
893		struct {
894			unsigned count;
895			unsigned instance_count;
896			unsigned start;
897		} data;
898
899		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
900
901		pipe_draw_info info;
902		info.mode = primitive_mode;
903		info.indexed = FALSE;
904		info.start = data.start;
905		info.count = data.count;
906		info.index_bias = 0;
907		info.min_index = 0;
908		info.max_index = ~0;
909		info.start_instance = 0;
910		info.instance_count = data.instance_count;
911		info.primitive_restart = FALSE;
912		info.count_from_stream_output = NULL;
913
914		pipe->draw_vbo(pipe, &info);
915	}
916
917#if API >= 11
918	virtual void STDMETHODCALLTYPE Dispatch(
919		unsigned thread_group_count_x,
920		unsigned thread_group_count_y,
921		unsigned thread_group_count_z)
922	{
923// uncomment this when this is implemented
924//		SYNCHRONIZED;
925//		if(update_flags)
926//			update_state();
927	}
928
929	virtual void STDMETHODCALLTYPE DispatchIndirect(
930		ID3D11Buffer *buffer,
931		unsigned aligned_byte_offset)
932	{
933// uncomment this when this is implemented
934//		SYNCHRONIZED;
935//		if(update_flags)
936//			update_state();
937	}
938#endif
939
940	virtual void STDMETHODCALLTYPE RSSetState(
941		ID3D11RasterizerState *new_rasterizer_state)
942	{
943		SYNCHRONIZED;
944		if(new_rasterizer_state != rasterizer_state.p)
945		{
946			rasterizer_state = new_rasterizer_state;
947			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
948		}
949	}
950
951	virtual void STDMETHODCALLTYPE RSGetState(
952		ID3D11RasterizerState **out_rasterizer_state)
953	{
954		SYNCHRONIZED;
955		*out_rasterizer_state = rasterizer_state.ref();
956	}
957
958	void set_viewport()
959	{
960		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
961		pipe_viewport_state viewport;
962		float half_width = viewports[0].Width * 0.5f;
963		float half_height = viewports[0].Height * 0.5f;
964
965		viewport.scale[0] = half_width;
966		viewport.scale[1] = -half_height;
967		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
968		viewport.scale[3] = 1.0f;
969		viewport.translate[0] = half_width + viewports[0].TopLeftX;
970		viewport.translate[1] = half_height + viewports[0].TopLeftY;
971		viewport.translate[2] = viewports[0].MinDepth;
972		viewport.translate[3] = 1.0f;
973		pipe->set_viewport_state(pipe, &viewport);
974	}
975
976	virtual void STDMETHODCALLTYPE RSSetViewports(
977		unsigned count,
978		const D3D11_VIEWPORT *new_viewports)
979	{
980		SYNCHRONIZED;
981		if(count)
982		{
983			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
984			{
985				viewports[0] = new_viewports[0];
986				set_viewport();
987			}
988			for(unsigned i = 1; i < count; ++i)
989				viewports[i] = new_viewports[i];
990		}
991		else if(num_viewports)
992		{
993			// TODO: what should we do here?
994			memset(&viewports[0], 0, sizeof(viewports[0]));
995			set_viewport();
996		}
997		num_viewports = count;
998	}
999
1000	virtual void STDMETHODCALLTYPE RSGetViewports(
1001		unsigned *out_count,
1002		D3D11_VIEWPORT *out_viewports)
1003	{
1004		SYNCHRONIZED;
1005		if(out_viewports)
1006		{
1007			unsigned i;
1008			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1009				out_viewports[i] = viewports[i];
1010
1011			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1012		}
1013
1014		*out_count = num_viewports;
1015	}
1016
1017	void set_scissor()
1018	{
1019		pipe_scissor_state scissor;
1020		scissor.minx = scissor_rects[0].left;
1021		scissor.miny = scissor_rects[0].top;
1022		scissor.maxx = scissor_rects[0].right;
1023		scissor.maxy = scissor_rects[0].bottom;
1024		pipe->set_scissor_state(pipe, &scissor);
1025	}
1026
1027	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1028		unsigned count,
1029		const D3D11_RECT *new_rects)
1030	{
1031		SYNCHRONIZED;
1032		if(count)
1033		{
1034			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1035			{
1036				scissor_rects[0] = new_rects[0];
1037				set_scissor();
1038			}
1039			for(unsigned i = 1; i < count; ++i)
1040				scissor_rects[i] = new_rects[i];
1041		}
1042		else if(num_scissor_rects)
1043		{
1044			// TODO: what should we do here?
1045			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1046			set_scissor();
1047		}
1048
1049		num_scissor_rects = count;
1050	}
1051
1052	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1053		unsigned *out_count,
1054		D3D11_RECT *out_rects)
1055	{
1056		SYNCHRONIZED;
1057		if(out_rects)
1058		{
1059			unsigned i;
1060			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1061				out_rects[i] = scissor_rects[i];
1062
1063			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1064		}
1065
1066		*out_count = num_scissor_rects;
1067	}
1068
1069	virtual void STDMETHODCALLTYPE OMSetBlendState(
1070		ID3D11BlendState *new_blend_state,
1071		const float new_blend_factor[4],
1072		unsigned new_sample_mask)
1073	{
1074		SYNCHRONIZED;
1075		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1076
1077		if(blend_state.p != new_blend_state)
1078		{
1079			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1080			blend_state = new_blend_state;
1081		}
1082
1083		// Windows D3D11 does this, even though it's apparently undocumented
1084		if(!new_blend_factor)
1085			new_blend_factor = white;
1086
1087		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1088		{
1089			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1090			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1091		}
1092
1093		if(sample_mask != new_sample_mask)
1094		{
1095			pipe->set_sample_mask(pipe, new_sample_mask);
1096			sample_mask = new_sample_mask;
1097		}
1098	}
1099
1100	virtual void STDMETHODCALLTYPE OMGetBlendState(
1101		ID3D11BlendState **out_blend_state,
1102		float out_blend_factor[4],
1103		unsigned *out_sample_mask)
1104	{
1105		SYNCHRONIZED;
1106		if(out_blend_state)
1107			*out_blend_state = blend_state.ref();
1108		if(out_blend_factor)
1109			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1110		if(out_sample_mask)
1111			*out_sample_mask = sample_mask;
1112	}
1113
1114	void set_stencil_ref()
1115	{
1116		struct pipe_stencil_ref sref;
1117		sref.ref_value[0] = stencil_ref;
1118		sref.ref_value[1] = stencil_ref;
1119		pipe->set_stencil_ref(pipe, &sref);
1120	}
1121
1122	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1123		ID3D11DepthStencilState *new_depth_stencil_state,
1124		unsigned new_stencil_ref)
1125	{
1126		SYNCHRONIZED;
1127		if(new_depth_stencil_state != depth_stencil_state.p)
1128		{
1129			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1130			depth_stencil_state = new_depth_stencil_state;
1131		}
1132
1133		if(new_stencil_ref != stencil_ref)
1134		{
1135			stencil_ref = new_stencil_ref;
1136			set_stencil_ref();
1137		}
1138	}
1139
1140	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1141		ID3D11DepthStencilState **out_depth_stencil_state,
1142		unsigned *out_stencil_ref)
1143	{
1144		SYNCHRONIZED;
1145		if(*out_depth_stencil_state)
1146			*out_depth_stencil_state = depth_stencil_state.ref();
1147		if(out_stencil_ref)
1148			*out_stencil_ref = stencil_ref;
1149	}
1150
1151	void set_framebuffer()
1152	{
1153		struct pipe_framebuffer_state fb;
1154		memset(&fb, 0, sizeof(fb));
1155		if(depth_stencil_view)
1156		{
1157			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1158			fb.zsbuf = surf;
1159			if(surf->width > fb.width)
1160				fb.width = surf->width;
1161			if(surf->height > fb.height)
1162				fb.height = surf->height;
1163		}
1164		fb.nr_cbufs = num_render_target_views;
1165		unsigned i;
1166		for(i = 0; i < num_render_target_views; ++i)
1167		{
1168			if(render_target_views[i])
1169			{
1170				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1171				fb.cbufs[i] = surf;
1172				if(surf->width > fb.width)
1173					fb.width = surf->width;
1174				if(surf->height > fb.height)
1175					fb.height = surf->height;
1176			}
1177		}
1178
1179		pipe->set_framebuffer_state(pipe, &fb);
1180	}
1181
1182	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1183	 * Hopefully nobody relies on this happening
1184	 */
1185
1186	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1187		unsigned count,
1188		ID3D11RenderTargetView *const *new_render_target_views,
1189		ID3D11DepthStencilView  *new_depth_stencil_view)
1190	{
1191		SYNCHRONIZED;
1192
1193		bool update = false;
1194		unsigned i, num;
1195
1196		if(depth_stencil_view.p != new_depth_stencil_view) {
1197			update = true;
1198			depth_stencil_view = new_depth_stencil_view;
1199		}
1200
1201		if(!new_render_target_views)
1202			count = 0;
1203
1204		for(num = 0, i = 0; i < count; ++i) {
1205#if API >= 11
1206			// XXX: is unbinding the UAVs here correct ?
1207			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1208#endif
1209			if(new_render_target_views[i] != render_target_views[i].p) {
1210				update = true;
1211				render_target_views[i] = new_render_target_views[i];
1212			}
1213			if(new_render_target_views[i])
1214				num = i + 1;
1215		}
1216		if(num != num_render_target_views) {
1217			update = true;
1218			for(; i < num_render_target_views; ++i)
1219				render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1220		}
1221		num_render_target_views = num;
1222		if(update)
1223			set_framebuffer();
1224	}
1225
1226	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1227		unsigned count,
1228		ID3D11RenderTargetView **out_render_target_views,
1229		ID3D11DepthStencilView  **out_depth_stencil_view)
1230	{
1231		SYNCHRONIZED;
1232		if(out_render_target_views)
1233		{
1234			unsigned i;
1235			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1236				out_render_target_views[i] = render_target_views[i].ref();
1237
1238			for(; i < count; ++i)
1239				out_render_target_views[i] = 0;
1240		}
1241
1242		if(out_depth_stencil_view)
1243			*out_depth_stencil_view = depth_stencil_view.ref();
1244	}
1245
1246#if API >= 11
1247	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1248	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1249		unsigned rtv_count,
1250		ID3D11RenderTargetView *const *new_render_target_views,
1251		ID3D11DepthStencilView  *new_depth_stencil_view,
1252		unsigned uav_start,
1253		unsigned uav_count,
1254		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1255		const unsigned *new_uav_initial_counts)
1256	{
1257		SYNCHRONIZED;
1258		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1259			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1260
1261		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1262		{
1263			for(unsigned i = 0; i < uav_count; ++i)
1264			{
1265				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1266				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1267			}
1268		}
1269	}
1270
1271	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1272		unsigned rtv_count,
1273		ID3D11RenderTargetView **out_render_target_views,
1274		ID3D11DepthStencilView  **out_depth_stencil_view,
1275		unsigned uav_start,
1276		unsigned uav_count,
1277		ID3D11UnorderedAccessView **out_unordered_access_views)
1278	{
1279		SYNCHRONIZED;
1280		if(out_render_target_views)
1281			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1282
1283		if(out_unordered_access_views)
1284		{
1285			for(unsigned i = 0; i < uav_count; ++i)
1286				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1287		}
1288	}
1289#endif
1290
1291	virtual void STDMETHODCALLTYPE SOSetTargets(
1292		unsigned count,
1293		ID3D11Buffer *const *new_so_targets,
1294		const unsigned *new_offsets)
1295	{
1296		SYNCHRONIZED;
1297
1298		unsigned new_count, i;
1299		bool changed = false;
1300
1301		uint32_t append_mask = 0xffffffff;
1302
1303		if(!new_so_targets)
1304			count = 0;
1305		for(new_count = 0, i = 0; i < count; ++i)
1306		{
1307			GalliumD3D11Buffer* buffer = static_cast<GalliumD3D11Buffer*>(new_so_targets[i]);
1308
1309			if(buffer != so_buffers[i].p)
1310			{
1311				changed = true;
1312				so_buffers[i] = buffer;
1313				so_targets[i] = buffer ? buffer->so_target : 0;
1314			}
1315			if(!buffer)
1316				continue;
1317			new_count = i + 1;
1318
1319			if(new_offsets[i] == (unsigned)-1)
1320			{
1321				assert(so_targets[i]);
1322				continue;
1323			}
1324			append_mask &= ~(1 << i);
1325
1326			if(!so_targets[i] || new_offsets[i] != so_targets[i]->buffer_offset)
1327			{
1328				pipe_so_target_reference(&buffer->so_target, NULL);
1329				buffer->so_target = pipe->create_stream_output_target(
1330					pipe, buffer->resource, new_offsets[i], buffer->resource->width0 - new_offsets[i]);
1331				so_targets[i] = buffer->so_target;
1332				changed = true;
1333			}
1334		}
1335		if(i < num_so_targets) {
1336			changed = true;
1337			for(; i < num_so_targets; ++i)
1338				so_buffers[i] = (GalliumD3D11Buffer*)0;
1339		}
1340		num_so_targets = new_count;
1341
1342		if(likely(caps.so) && (changed || append_mask != 0xffffffff))
1343			pipe->set_stream_output_targets(pipe, num_so_targets, so_targets, append_mask);
1344	}
1345
1346	virtual void STDMETHODCALLTYPE SOGetTargets(
1347		unsigned count,
1348		ID3D11Buffer **out_so_targets
1349#if API < 11
1350		, UINT *out_offsets
1351#endif
1352		)
1353	{
1354		SYNCHRONIZED;
1355		for(unsigned i = 0; i < count; ++i)
1356		{
1357			out_so_targets[i] = so_buffers[i].ref();
1358#if API < 11
1359			out_offsets[i] = so_targets[i]->buffer_offset;
1360#endif
1361		}
1362	}
1363
1364	virtual void STDMETHODCALLTYPE Begin(
1365		ID3D11Asynchronous *async)
1366	{
1367		SYNCHRONIZED;
1368		if(caps.queries)
1369			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1370	}
1371
1372	virtual void STDMETHODCALLTYPE End(
1373		ID3D11Asynchronous *async)
1374	{
1375		SYNCHRONIZED;
1376		if(caps.queries)
1377			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1378	}
1379
1380	virtual HRESULT STDMETHODCALLTYPE GetData(
1381		ID3D11Asynchronous *iasync,
1382		void *out_data,
1383		unsigned data_size,
1384		unsigned get_data_flags)
1385	{
1386		SYNCHRONIZED;
1387		if(!caps.queries)
1388			return E_NOTIMPL;
1389
1390		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1391		void* tmp_data = alloca(async->data_size);
1392		memset(tmp_data, 0, async->data_size); // sizeof(BOOL) is 4, sizeof(boolean) is 1
1393		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1394		if(out_data)
1395      {
1396			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1397      }
1398		return ret ? S_OK : S_FALSE;
1399	}
1400
1401	void set_render_condition()
1402	{
1403		if(caps.render_condition)
1404		{
1405			if(!render_predicate)
1406				pipe->render_condition(pipe, 0, 0);
1407			else
1408			{
1409				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1410				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1411				{
1412					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1413					pipe->render_condition(pipe, predicate->query, mode);
1414				}
1415				else
1416				{
1417					/* TODO: add inverted predication to Gallium*/
1418					pipe->render_condition(pipe, 0, 0);
1419				}
1420			}
1421		}
1422	}
1423
1424	virtual void STDMETHODCALLTYPE SetPredication(
1425		ID3D11Predicate *new_predicate,
1426		BOOL new_predicate_value)
1427	{
1428		SYNCHRONIZED;
1429		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1430		{
1431			render_predicate = new_predicate;
1432			render_predicate_value = new_predicate_value;
1433			set_render_condition();
1434		}
1435	}
1436
1437	virtual void STDMETHODCALLTYPE GetPredication(
1438		ID3D11Predicate **out_predicate,
1439		BOOL *out_predicate_value)
1440	{
1441		SYNCHRONIZED;
1442		if(out_predicate)
1443			*out_predicate = render_predicate.ref();
1444		if(out_predicate_value)
1445			*out_predicate_value = render_predicate_value;
1446	}
1447
1448	static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1449	{
1450		if(subresource <= resource->last_level)
1451		{
1452			return subresource;
1453		}
1454		else
1455		{
1456			unsigned levels = resource->last_level + 1;
1457			return subresource % levels;
1458		}
1459	}
1460
1461	static unsigned d3d11_subresource_to_layer(struct pipe_resource* resource, unsigned subresource)
1462	{
1463		if(subresource <= resource->last_level)
1464		{
1465			return 0;
1466		}
1467		else
1468		{
1469			unsigned levels = resource->last_level + 1;
1470			return subresource / levels;
1471		}
1472	}
1473
1474
1475	/* TODO: deferred contexts will need a different implementation of this,
1476	 * because we can't put the transfer info into the resource itself.
1477	 * Also, there are very different restrictions, for obvious reasons.
1478	 */
1479	virtual HRESULT STDMETHODCALLTYPE Map(
1480		ID3D11Resource *iresource,
1481		unsigned subresource,
1482		D3D11_MAP map_type,
1483		unsigned map_flags,
1484		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1485	{
1486		SYNCHRONIZED;
1487		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1488		if(resource->transfers.count(subresource))
1489			return E_FAIL;
1490		unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1491		unsigned layer = d3d11_subresource_to_layer(resource->resource, subresource);
1492		pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1493		box.z += layer;
1494		unsigned usage = 0;
1495		if(map_type == D3D11_MAP_READ)
1496			usage = PIPE_TRANSFER_READ;
1497		else if(map_type == D3D11_MAP_WRITE)
1498			usage = PIPE_TRANSFER_WRITE;
1499		else if(map_type == D3D11_MAP_READ_WRITE)
1500			usage = PIPE_TRANSFER_READ_WRITE;
1501		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1502			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1503		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1504			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1505		else
1506			return E_INVALIDARG;
1507		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1508			usage |= PIPE_TRANSFER_DONTBLOCK;
1509		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1510		if(!transfer) {
1511			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1512				return DXGI_ERROR_WAS_STILL_DRAWING;
1513			else
1514				return E_FAIL;
1515		}
1516		resource->transfers[subresource] = transfer;
1517		mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1518		mapped_resource->RowPitch = transfer->stride;
1519		mapped_resource->DepthPitch = transfer->layer_stride;
1520		return S_OK;
1521	}
1522
1523	virtual void STDMETHODCALLTYPE Unmap(
1524		ID3D11Resource *iresource,
1525		unsigned subresource)
1526	{
1527		SYNCHRONIZED;
1528		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1529		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1530		if(i != resource->transfers.end())
1531		{
1532			pipe->transfer_unmap(pipe, i->second);
1533			pipe->transfer_destroy(pipe, i->second);
1534			resource->transfers.erase(i);
1535		}
1536	}
1537
1538	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1539		ID3D11Resource *dst_resource,
1540		unsigned dst_subresource,
1541		unsigned dst_x,
1542		unsigned dst_y,
1543		unsigned dst_z,
1544		ID3D11Resource *src_resource,
1545		unsigned src_subresource,
1546		const D3D11_BOX *src_box)
1547	{
1548		SYNCHRONIZED;
1549		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1550		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1551		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1552		unsigned dst_layer = d3d11_subresource_to_layer(dst->resource, dst_subresource);
1553		unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1554		unsigned src_layer = d3d11_subresource_to_layer(src->resource, src_subresource);
1555		pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1556		dst_z += dst_layer;
1557		box.z += src_layer;
1558		{
1559			pipe->resource_copy_region(pipe,
1560				dst->resource, dst_level, dst_x, dst_y, dst_z,
1561				src->resource, src_level, &box);
1562		}
1563	}
1564
1565	virtual void STDMETHODCALLTYPE CopyResource(
1566		ID3D11Resource *dst_resource,
1567		ID3D11Resource *src_resource)
1568	{
1569		SYNCHRONIZED;
1570		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1571		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1572		unsigned level;
1573		for(level = 0; level <= dst->resource->last_level; ++level)
1574		{
1575			pipe_box box;
1576			box.x = box.y = box.z = 0;
1577			box.width = u_minify(dst->resource->width0, level);
1578			box.height = u_minify(dst->resource->height0, level);
1579			if(dst->resource->target == PIPE_TEXTURE_3D)
1580				box.depth = u_minify(dst->resource->depth0, level);
1581			else
1582				box.depth = dst->resource->array_size;
1583			pipe->resource_copy_region(pipe,
1584						   dst->resource, level, 0, 0, 0,
1585						   src->resource, level, &box);
1586		}
1587	}
1588
1589	virtual void STDMETHODCALLTYPE UpdateSubresource(
1590		ID3D11Resource *dst_resource,
1591		unsigned dst_subresource,
1592		const D3D11_BOX *pDstBox,
1593		const void *pSrcData,
1594		unsigned src_row_pitch,
1595		unsigned src_depth_pitch)
1596	{
1597		SYNCHRONIZED;
1598		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1599		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1600		unsigned dst_layer = d3d11_subresource_to_layer(dst->resource, dst_subresource);
1601		pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1602		box.z += dst_layer;
1603		pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1604	}
1605
1606#if API >= 11
1607	virtual void STDMETHODCALLTYPE CopyStructureCount(
1608		ID3D11Buffer *dst_buffer,
1609		unsigned dst_aligned_byte_offset,
1610		ID3D11UnorderedAccessView *src_view)
1611	{
1612		SYNCHRONIZED;
1613	}
1614#endif
1615
1616	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1617		ID3D11RenderTargetView *render_target_view,
1618		const float color[4])
1619	{
1620		SYNCHRONIZED;
1621		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1622		union pipe_color_union cc;
1623		cc.f[0] = color[0];
1624		cc.f[1] = color[1];
1625		cc.f[2] = color[2];
1626		cc.f[3] = color[3];
1627		pipe->clear_render_target(pipe, view->object, &cc, 0, 0, view->object->width, view->object->height);
1628	}
1629
1630	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1631		ID3D11DepthStencilView  *depth_stencil_view,
1632		unsigned clear_flags,
1633		float depth,
1634		UINT8 stencil)
1635	{
1636		SYNCHRONIZED;
1637		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1638		unsigned flags = 0;
1639		if(clear_flags & D3D11_CLEAR_DEPTH)
1640			flags |= PIPE_CLEAR_DEPTH;
1641		if(clear_flags & D3D11_CLEAR_STENCIL)
1642			flags |= PIPE_CLEAR_STENCIL;
1643		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1644	}
1645
1646#if API >= 11
1647	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1648		ID3D11UnorderedAccessView *unordered_access_view,
1649		const unsigned values[4])
1650	{
1651		SYNCHRONIZED;
1652	}
1653
1654	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1655			ID3D11UnorderedAccessView *unordered_access_view,
1656			const float values[4])
1657	{
1658		SYNCHRONIZED;
1659	}
1660#endif
1661
1662	void restore_gallium_state_blit_only()
1663	{
1664		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1665		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1666		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1667		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1668		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1669		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1670		if(caps.gs)
1671			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1672		if(caps.so && num_so_targets)
1673			pipe->set_stream_output_targets(pipe, num_so_targets, so_targets, ~0);
1674		set_framebuffer();
1675		set_viewport();
1676		set_render_condition();
1677
1678		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1679	}
1680
1681	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1682	{
1683		SYNCHRONIZED;
1684		restore_gallium_state_blit_only();
1685	}
1686
1687	virtual void STDMETHODCALLTYPE GenerateMips(
1688		ID3D11ShaderResourceView *shader_resource_view)
1689	{
1690		SYNCHRONIZED;
1691
1692		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1693		if(caps.gs)
1694			pipe->bind_gs_state(pipe, 0);
1695		if(caps.so && num_so_targets)
1696			pipe->set_stream_output_targets(pipe, 0, NULL, 0);
1697		if(pipe->render_condition)
1698			pipe->render_condition(pipe, 0, 0);
1699		for(unsigned layer = view->object->u.tex.first_layer; layer <= view->object->u.tex.last_layer; ++layer)
1700			util_gen_mipmap(gen_mipmap, view->object, layer, view->object->u.tex.first_level, view->object->u.tex.last_level, PIPE_TEX_FILTER_LINEAR);
1701		restore_gallium_state_blit_only();
1702	}
1703
1704	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1705	{
1706		SYNCHRONIZED;
1707		restore_gallium_state_blit_only();
1708
1709		set_index_buffer();
1710		set_stencil_ref();
1711		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1712		pipe->set_sample_mask(pipe, sample_mask);
1713
1714		for(unsigned s = 0; s < 3; ++s)
1715		{
1716			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1717			for(unsigned i = 0; i < num; ++i)
1718				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1719		}
1720
1721		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1722		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1723
1724		set_scissor();
1725	}
1726
1727#if API >= 11
1728	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1729	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1730		ID3D11Resource *iresource,
1731		float min_lod)
1732	{
1733		SYNCHRONIZED;
1734		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1735		if(resource->min_lod != min_lod)
1736		{
1737			// TODO: actually do anything?
1738			resource->min_lod = min_lod;
1739		}
1740	}
1741
1742	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1743		ID3D11Resource *iresource)
1744	{
1745		SYNCHRONIZED;
1746		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1747		return resource->min_lod;
1748	}
1749#endif
1750
1751	virtual void STDMETHODCALLTYPE ResolveSubresource(
1752		ID3D11Resource *dst_resource,
1753		unsigned dst_subresource,
1754		ID3D11Resource *src_resource,
1755		unsigned src_subresource,
1756		DXGI_FORMAT format)
1757	{
1758		SYNCHRONIZED;
1759		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1760		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1761		struct pipe_resolve_info info;
1762
1763		info.dst.res = dst->resource;
1764		info.src.res = src->resource;
1765		info.dst.level = 0;
1766		info.dst.layer = d3d11_subresource_to_layer(dst->resource, dst_subresource);
1767		info.src.layer = d3d11_subresource_to_layer(src->resource, src_subresource);
1768
1769		info.src.x0 = 0;
1770		info.src.x1 = info.src.res->width0;
1771		info.src.y0 = 0;
1772		info.src.y1 = info.src.res->height0;
1773		info.dst.x0 = 0;
1774		info.dst.x1 = info.dst.res->width0;
1775		info.dst.y0 = 0;
1776		info.dst.y1 = info.dst.res->height0;
1777
1778		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
1779
1780		pipe->resource_resolve(pipe, &info);
1781	}
1782
1783#if API >= 11
1784	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1785		ID3D11CommandList *command_list,
1786		BOOL restore_context_state)
1787	{
1788		SYNCHRONIZED;
1789	}
1790
1791	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1792		BOOL restore_deferred_context_state,
1793		ID3D11CommandList **out_command_list)
1794	{
1795		SYNCHRONIZED;
1796		return E_NOTIMPL;
1797	}
1798#endif
1799
1800	virtual void STDMETHODCALLTYPE ClearState(void)
1801	{
1802		/* we don't take a lock here because we would deadlock otherwise
1803		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1804		 * However, I can't think of any correct usage that would be affected by this
1805		 * being non-atomic, and making this atomic is quite expensive and complicates
1806		 * the code
1807		 */
1808
1809		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1810		// TODO: make sure all this gets inlined, which might require more compiler flags
1811		// TODO: optimize this
1812#if API >= 11
1813		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1814		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1815		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1816		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1817		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1818		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1819#else
1820		GalliumD3D11DeviceContext::PSSetShader(0);
1821		GalliumD3D11DeviceContext::GSSetShader(0);
1822		GalliumD3D11DeviceContext::VSSetShader(0);
1823#endif
1824
1825		GalliumD3D11DeviceContext::IASetInputLayout(0);
1826		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1827		GalliumD3D11DeviceContext::RSSetState(0);
1828		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1829		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1830		GalliumD3D11DeviceContext::SetPredication(0, 0);
1831		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1832
1833		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1834		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1835		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1836#if API >= 11
1837		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1838		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1839		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1840#endif
1841
1842		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1843#if API >= 11
1844		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1845#else
1846		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1847#endif
1848		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1849
1850		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1851		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1852		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1853#if API >= 11
1854		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1855		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1856		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1857#endif
1858
1859		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1860		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1861		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1862#if API >= 11
1863		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1864		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1865		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1866#endif
1867
1868		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1869		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1870	}
1871
1872	virtual void STDMETHODCALLTYPE Flush(void)
1873	{
1874		SYNCHRONIZED;
1875                pipe->flush(pipe, 0);
1876	}
1877
1878	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1879	 * cleanly unbound from the pipeline.
1880	 * In Direct3D 11, the pipeline holds a reference.
1881	 *
1882	 * Note that instead of always scanning the pipeline on destruction, we could
1883	 * maintain the internal reference count on DirectX 10 and use it to check if an
1884	 * object is still bound.
1885	 * Presumably, on average, scanning is faster if the application is well written.
1886	 */
1887#if API < 11
1888#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1889	void Unbind##name(ID3D11##name* state) \
1890	{ \
1891		SYNCHRONIZED; \
1892		if((void*)state == (void*)member.p) \
1893		{ \
1894			member.p = 0; \
1895			pipe->bind_##gallium##_state(pipe, default_##def); \
1896		} \
1897	}
1898	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1899	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1900	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1901	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1902	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1903	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1904	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1905
1906	void UnbindPredicate(ID3D11Predicate* predicate)
1907	{
1908		SYNCHRONIZED;
1909		if(predicate == render_predicate)
1910		{
1911			render_predicate.p = NULL;
1912			render_predicate_value = 0;
1913			pipe->render_condition(pipe, 0, 0);
1914		}
1915	}
1916
1917	void UnbindSamplerState(ID3D11SamplerState* state)
1918	{
1919		SYNCHRONIZED;
1920		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1921		{
1922			for(unsigned i = 0; i < num_samplers[s]; ++i)
1923			{
1924				if(samplers[s][i] == state)
1925				{
1926					samplers[s][i].p = NULL;
1927					sampler_csos[s][i] = NULL;
1928					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1929				}
1930			}
1931		}
1932	}
1933
1934	void UnbindBuffer(ID3D11Buffer* buffer)
1935	{
1936		SYNCHRONIZED;
1937		if(buffer == index_buffer)
1938		{
1939			index_buffer.p = 0;
1940			index_format = DXGI_FORMAT_UNKNOWN;
1941			index_offset = 0;
1942			struct pipe_index_buffer ib;
1943			memset(&ib, 0, sizeof(ib));
1944			pipe->set_index_buffer(pipe, &ib);
1945		}
1946
1947		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1948		{
1949			if(buffer == input_buffers[i])
1950			{
1951				input_buffers[i].p = 0;
1952				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1953				update_flags |= UPDATE_VERTEX_BUFFERS;
1954			}
1955		}
1956
1957		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1958		{
1959			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1960			{
1961				if(constant_buffers[s][i] == buffer)
1962				{
1963					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1964					pipe->set_constant_buffer(pipe, s, i, NULL);
1965				}
1966			}
1967		}
1968	}
1969
1970	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1971	{
1972		SYNCHRONIZED;
1973		if(view == depth_stencil_view)
1974		{
1975			depth_stencil_view.p = NULL;
1976			set_framebuffer();
1977		}
1978	}
1979
1980	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1981	{
1982		SYNCHRONIZED;
1983		bool any_bound = false;
1984		for(unsigned i = 0; i < num_render_target_views; ++i)
1985		{
1986			if(render_target_views[i] == view)
1987			{
1988				render_target_views[i].p = NULL;
1989				any_bound = true;
1990			}
1991		}
1992		if(any_bound)
1993			set_framebuffer();
1994	}
1995
1996	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1997	{
1998		SYNCHRONIZED;
1999		for(unsigned s = 0; s < D3D11_STAGES; ++s)
2000		{
2001			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
2002			{
2003				if(shader_resource_views[s][i] == view)
2004				{
2005					shader_resource_views[s][i].p = NULL;
2006					sampler_views[s][i] = NULL;
2007					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
2008				}
2009			}
2010		}
2011	}
2012#endif
2013
2014#undef SYNCHRONIZED
2015};
2016
2017#if API >= 11
2018/* This approach serves two purposes.
2019 * First, we don't want to do an atomic operation to manipulate the reference
2020 * count every time something is bound/unbound to the pipeline, since they are
2021 * expensive.
2022 * Fortunately, the immediate context can only be used by a single thread, so
2023 * we don't have to use them, as long as a separate reference count is used
2024 * (see dual_refcnt_t).
2025 *
2026 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
2027 * garbage cycle.
2028 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
2029 * one for each external reference count, while internal nonatomic_add_ref doesn't
2030 * add any.
2031 *
2032 * Note that ideally we would to eliminate the non-atomic op too, but this is more
2033 * complicated, since we would either need to use garbage collection and give up
2034 * deterministic destruction (especially bad for large textures), or scan the whole
2035 * pipeline state every time the reference count of object drops to 0, which risks
2036 * pathological slowdowns.
2037 *
2038 * Since this microoptimization should matter relatively little, let's avoid it for now.
2039 *
2040 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
2041 * Eliminating the atomic ops for deferred contexts seems substantially harder.
2042 * This might be a problem if they are used in a one-shot multithreaded rendering
2043 * fashion, where SMP cacheline bouncing on the reference count may be visible.
2044 *
2045 * The idea would be to attach a structure of reference counts indexed by deferred
2046 * context id to each object. Ideally, this should be organized like ext2 block pointers.
2047 *
2048 * Every deferred context would get a reference count in its own cacheline.
2049 * The external count is protected by a lock bit, and there is also a "lock bit" in each
2050 * internal count.
2051 *
2052 * When the external count has to be dropped to 0, the lock bit is taken and all internal
2053 * reference counts are scanned, taking a count of them. A flag would also be set on them.
2054 * Deferred context manipulation would notice the flag, and update the count.
2055 * Once the count goes to zero, the object is freed.
2056 *
2057 * The problem of this is that if the external reference count ping-pongs between
2058 * zero and non-zero, the scans will take a lot of time.
2059 *
2060 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2061 * each binary tree node would have a "determined bit", which would be invalidated
2062 * by manipulations.
2063 *
2064 * However, all this complexity might actually be a loss in most cases, so let's just
2065 * stick to a single atomic refcnt for now.
2066 *
2067 * Also, we don't even support deferred contexts yet, so this can wait.
2068 */
2069struct nonatomic_device_child_ptr_traits
2070{
2071	static void add_ref(void* p)
2072	{
2073		if(p)
2074			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2075	}
2076
2077	static void release(void* p)
2078	{
2079		if(p)
2080			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2081	}
2082};
2083
2084struct GalliumD3D11ImmediateDeviceContext
2085	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2086{
2087	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2088	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2089	{
2090		// not necessary, but tests that the API at least basically works
2091		ClearState();
2092	}
2093
2094	/* we do this since otherwise we would have a garbage cycle between this and the device */
2095	virtual ULONG STDMETHODCALLTYPE AddRef()
2096	{
2097		return this->device->AddRef();
2098	}
2099
2100	virtual ULONG STDMETHODCALLTYPE Release()
2101	{
2102		return this->device->Release();
2103	}
2104
2105	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2106	{
2107		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2108	}
2109};
2110
2111static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2112{
2113	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2114}
2115
2116static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2117{
2118	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2119}
2120
2121static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2122{
2123	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2124}
2125
2126static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2127{
2128	delete (GalliumD3D11ImmediateDeviceContext*)context;
2129}
2130#endif
2131