d3d11_context.h revision 55592d9da1bb694c7275984cf9a3ecaafcccf46a
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/* used to unbind things, we need 128 due to resources */
28static const void* zero_data[128];
29
30#define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31#define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32#define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34#if API >= 11
35template<typename PtrTraits>
36struct GalliumD3D11DeviceContext :
37	public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38{
39#else
40template<bool threadsafe>
41struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42{
43	typedef simple_ptr_traits PtrTraits;
44	typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45#endif
46
47	refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48	refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50	refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51	refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52	refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53	refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54	refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57	refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58	refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60	refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61	refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63#if API >= 11
64	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65	refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66#endif
67
68	D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69	D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70	unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71	D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72	DXGI_FORMAT index_format;
73	unsigned index_offset;
74	BOOL render_predicate_value;
75	float blend_color[4];
76	unsigned sample_mask;
77	unsigned stencil_ref;
78	bool depth_clamp;
79
80	void* default_input_layout;
81	void* default_rasterizer;
82	void* default_depth_stencil;
83	void* default_blend;
84	void* default_sampler;
85	void* ld_sampler;
86	void * default_shaders[D3D11_STAGES];
87
88	// derived state
89	int primitive_mode;
90	struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91	struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92	struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93	struct
94	{
95		void* ld; // accessed with a -1 index from v
96		void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97	} sampler_csos[D3D11_STAGES];
98	struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99	unsigned num_shader_resource_views[D3D11_STAGES];
100	unsigned num_samplers[D3D11_STAGES];
101	unsigned num_vertex_buffers;
102	unsigned num_render_target_views;
103	unsigned num_viewports;
104	unsigned num_scissor_rects;
105	unsigned num_so_targets;
106
107	struct pipe_context* pipe;
108	unsigned update_flags;
109
110	bool owns_pipe;
111	unsigned context_flags;
112
113	GalliumD3D11Caps caps;
114
115	cso_context* cso_ctx;
116	gen_mipmap_state* gen_mipmap;
117
118#if API >= 11
119#define SYNCHRONIZED do {} while(0)
120
121	GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122	: GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123	{
124		caps = device->screen_caps;
125		init_context();
126	}
127
128	~GalliumD3D11DeviceContext()
129	{
130		destroy_context();
131	}
132#else
133#define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135	GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136	: GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137	{
138		caps = this->screen_caps;
139		init_context();
140	}
141
142	~GalliumD3D10Device()
143	{
144		destroy_context();
145	}
146#endif
147
148	void init_context()
149	{
150		if(!pipe->begin_query)
151			caps.queries = false;
152		if(!pipe->render_condition)
153			caps.render_condition = false;
154		if(!pipe->bind_gs_state)
155		{
156			caps.gs = false;
157			caps.stages = 2;
158		}
159		if(!pipe->set_stream_output_buffers)
160			caps.so = false;
161		if(!pipe->set_geometry_sampler_views)
162			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
163		if(!pipe->set_fragment_sampler_views)
164			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
165		if(!pipe->set_vertex_sampler_views)
166			caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
167
168		update_flags = 0;
169
170		// pipeline state
171		memset(viewports, 0, sizeof(viewports));
172		memset(scissor_rects, 0, sizeof(scissor_rects));
173		memset(so_offsets, 0, sizeof(so_offsets));
174		primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
175		index_format = DXGI_FORMAT_UNKNOWN;
176		index_offset = 0;
177		render_predicate_value = 0;
178		memset(blend_color, 0, sizeof(blend_color));
179		sample_mask = ~0;
180		stencil_ref = 0;
181		depth_clamp = 0;
182
183		// derived state
184		primitive_mode = 0;
185		memset(vertex_buffers, 0, sizeof(vertex_buffers));
186		memset(so_buffers, 0, sizeof(so_buffers));
187		memset(sampler_views, 0, sizeof(sampler_views));
188		memset(sampler_csos, 0, sizeof(sampler_csos));
189		memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
190		memset(num_samplers, 0, sizeof(num_samplers));
191		num_vertex_buffers = 0;
192		num_render_target_views = 0;
193		num_viewports = 0;
194		num_scissor_rects = 0;
195		num_so_targets = 0;
196
197		default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
198
199		struct pipe_rasterizer_state rasterizerd;
200		memset(&rasterizerd, 0, sizeof(rasterizerd));
201		rasterizerd.gl_rasterization_rules = 1;
202		rasterizerd.cull_face = PIPE_FACE_BACK;
203		default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
204
205		struct pipe_depth_stencil_alpha_state depth_stencild;
206		memset(&depth_stencild, 0, sizeof(depth_stencild));
207		depth_stencild.depth.enabled = TRUE;
208		depth_stencild.depth.writemask = 1;
209		depth_stencild.depth.func = PIPE_FUNC_LESS;
210		default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
211
212		struct pipe_blend_state blendd;
213		memset(&blendd, 0, sizeof(blendd));
214		blendd.rt[0].colormask = 0xf;
215		default_blend = pipe->create_blend_state(pipe, &blendd);
216
217		struct pipe_sampler_state samplerd;
218		memset(&samplerd, 0, sizeof(samplerd));
219		samplerd.normalized_coords = 1;
220		samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
221		samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
222		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
223		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
224		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
225		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
226		samplerd.border_color[0] = 1.0f;
227		samplerd.border_color[1] = 1.0f;
228		samplerd.border_color[2] = 1.0f;
229		samplerd.border_color[3] = 1.0f;
230		samplerd.min_lod = -FLT_MAX;
231		samplerd.max_lod = FLT_MAX;
232		samplerd.max_anisotropy = 1;
233		default_sampler = pipe->create_sampler_state(pipe, &samplerd);
234
235		memset(&samplerd, 0, sizeof(samplerd));
236		samplerd.normalized_coords = 0;
237		samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
238		samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
239		samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
240		samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
241		samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
242		samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
243		samplerd.min_lod = -FLT_MAX;
244		samplerd.max_lod = FLT_MAX;
245		samplerd.max_anisotropy = 1;
246		ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
247
248		for(unsigned s = 0; s < D3D11_STAGES; ++s)
249		{
250			sampler_csos[s].ld = ld_sampler;
251			for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
252				sampler_csos[s].v[i] = default_sampler;
253		}
254
255		// TODO: should this really be empty shaders, or should they be all-passthrough?
256		memset(default_shaders, 0, sizeof(default_shaders));
257		struct ureg_program *ureg;
258		ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
259		ureg_END(ureg);
260		default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
261
262		ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
263		ureg_END(ureg);
264		default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
265
266		cso_ctx = cso_create_context(pipe);
267		gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
268
269		RestoreGalliumState();
270	}
271
272	void destroy_context()
273	{
274		util_destroy_gen_mipmap(gen_mipmap);
275		cso_destroy_context(cso_ctx);
276
277		pipe->bind_vertex_elements_state(pipe, 0);
278		pipe->delete_vertex_elements_state(pipe, default_input_layout);
279
280		pipe->bind_rasterizer_state(pipe, 0);
281		pipe->delete_rasterizer_state(pipe, default_rasterizer);
282
283		pipe->bind_depth_stencil_alpha_state(pipe, 0);
284		pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
285
286		pipe->bind_blend_state(pipe, 0);
287		pipe->delete_blend_state(pipe, default_blend);
288
289		pipe->bind_fragment_sampler_states(pipe, 0, 0);
290		pipe->bind_vertex_sampler_states(pipe, 0, 0);
291		if(pipe->bind_geometry_sampler_states)
292			pipe->bind_geometry_sampler_states(pipe, 0, 0);
293		pipe->delete_sampler_state(pipe, default_sampler);
294		pipe->delete_sampler_state(pipe, ld_sampler);
295
296		pipe->bind_fs_state(pipe, 0);
297		pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
298
299		pipe->bind_vs_state(pipe, 0);
300		pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
301
302		if(owns_pipe)
303			pipe->destroy(pipe);
304	}
305
306	virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
307	{
308		return context_flags;
309	}
310#if API >= 11
311#define SET_SHADER_EXTRA_ARGS , \
312	ID3D11ClassInstance *const *ppClassInstances, \
313	unsigned count
314#define GET_SHADER_EXTRA_ARGS , \
315		ID3D11ClassInstance **ppClassInstances, \
316		unsigned *out_count
317#else
318#define SET_SHADER_EXTRA_ARGS
319#define GET_SHADER_EXTRA_ARGS
320#endif
321
322/* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
323 * Instead, you have to pass a pointer to nulls to unbind things.
324 * We do the same.
325 * TODO: is D3D10 the same?
326 */
327	template<unsigned s>
328	void xs_set_shader(GalliumD3D11Shader<>* shader)
329	{
330		if(shader != shaders[s].p)
331		{
332			shaders[s] = shader;
333			void* shader_cso = shader ? shader->object : default_shaders[s];
334			switch(s)
335			{
336			case PIPE_SHADER_VERTEX:
337				pipe->bind_vs_state(pipe, shader_cso);
338				break;
339			case PIPE_SHADER_FRAGMENT:
340				pipe->bind_fs_state(pipe, shader_cso);
341				break;
342			case PIPE_SHADER_GEOMETRY:
343				pipe->bind_gs_state(pipe, shader_cso);
344				break;
345			}
346			update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
347		}
348	}
349
350	template<unsigned s>
351	void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
352	{
353		for(unsigned i = 0; i < count; ++i)
354		{
355			if(constbufs[i] != constant_buffers[s][start + i].p)
356			{
357				constant_buffers[s][start + i] = constbufs[i];
358				if(s < caps.stages && start + i < caps.constant_buffers[s])
359					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
360			}
361		}
362	}
363
364	template<unsigned s>
365	void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
366	{
367		int last_different = -1;
368		for(unsigned i = 0; i < count; ++i)
369		{
370			if(shader_resource_views[s][start + i].p != srvs[i])
371			{
372				shader_resource_views[s][start + i] = srvs[i];
373				sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
374				last_different = i;
375			}
376		}
377		if(last_different >= 0)
378		{
379			num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
380			update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
381		}
382	}
383
384	template<unsigned s>
385	void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
386	{
387		int last_different = -1;
388		for(unsigned i = 0; i < count; ++i)
389		{
390			if(samplers[s][start + i].p != samps[i])
391			{
392				samplers[s][start + i] = samps[i];
393				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
394				last_different = i;
395			}
396			if(last_different >= 0)
397			{
398				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
399				update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
400			}
401		}
402	}
403
404#define IMPLEMENT_SHADER_STAGE(XS, Stage) \
405	virtual void STDMETHODCALLTYPE XS##SetShader( \
406		ID3D11##Stage##Shader *pShader \
407		SET_SHADER_EXTRA_ARGS) \
408	{ \
409		SYNCHRONIZED; \
410		xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
411	} \
412	virtual void STDMETHODCALLTYPE XS##GetShader(\
413		ID3D11##Stage##Shader **ppShader \
414		GET_SHADER_EXTRA_ARGS) \
415	{ \
416		SYNCHRONIZED; \
417		*ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
418	} \
419	virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
420		unsigned start, \
421		unsigned count, \
422		ID3D11Buffer *const* constant_buffers) \
423	{ \
424		SYNCHRONIZED; \
425		xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
426	} \
427	virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
428		unsigned start, \
429		unsigned count, \
430		ID3D11Buffer **out_constant_buffers) \
431	{ \
432		SYNCHRONIZED; \
433		for(unsigned i = 0; i < count; ++i) \
434			out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
435	} \
436	virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
437		unsigned start, \
438		unsigned count, \
439		ID3D11ShaderResourceView *const *new_shader_resource_views) \
440	{ \
441		SYNCHRONIZED; \
442		xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
443	} \
444	virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
445		unsigned start, \
446		unsigned count, \
447		ID3D11ShaderResourceView **out_shader_resource_views) \
448	{ \
449		SYNCHRONIZED; \
450		for(unsigned i = 0; i < count; ++i) \
451			out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
452	} \
453	virtual void STDMETHODCALLTYPE XS##SetSamplers(\
454		unsigned start, \
455		unsigned count, \
456		ID3D11SamplerState *const *new_samplers) \
457	{ \
458		SYNCHRONIZED; \
459		xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
460	} \
461	virtual void STDMETHODCALLTYPE XS##GetSamplers( \
462		unsigned start, \
463		unsigned count, \
464		ID3D11SamplerState **out_samplers) \
465	{ \
466		SYNCHRONIZED; \
467		for(unsigned i = 0; i < count; ++i) \
468			out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
469	}
470
471#define DO_VS(x) x
472#define DO_GS(x) do {if(caps.gs) {x;}} while(0)
473#define DO_PS(x) x
474#define DO_HS(x)
475#define DO_DS(x)
476#define DO_CS(x)
477	IMPLEMENT_SHADER_STAGE(VS, Vertex)
478	IMPLEMENT_SHADER_STAGE(GS, Geometry)
479	IMPLEMENT_SHADER_STAGE(PS, Pixel)
480
481#if API >= 11
482	IMPLEMENT_SHADER_STAGE(HS, Hull)
483	IMPLEMENT_SHADER_STAGE(DS, Domain)
484	IMPLEMENT_SHADER_STAGE(CS, Compute)
485
486	virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
487		unsigned start,
488		unsigned count,
489		ID3D11UnorderedAccessView *const *new_unordered_access_views,
490		const unsigned *new_uav_initial_counts)
491	{
492		SYNCHRONIZED;
493		for(unsigned i = 0; i < count; ++i)
494			cs_unordered_access_views[start + i] = new_unordered_access_views[i];
495	}
496
497	virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
498		unsigned start,
499		unsigned count,
500		ID3D11UnorderedAccessView **out_unordered_access_views)
501	{
502		SYNCHRONIZED;
503		for(unsigned i = 0; i < count; ++i)
504			out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
505	}
506#endif
507
508	template<unsigned s>
509	void update_stage()
510	{
511		if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
512		{
513			while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
514				--num_shader_resource_views[s];
515			if((1 << s) & caps.stages_with_sampling)
516			{
517				struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
518				unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
519				for(unsigned i = 0; i < num_views_to_bind; ++i)
520				{
521					views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
522				}
523				switch(s)
524				{
525				case PIPE_SHADER_VERTEX:
526					pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
527					break;
528				case PIPE_SHADER_FRAGMENT:
529					pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
530					break;
531				case PIPE_SHADER_GEOMETRY:
532					pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
533					break;
534				}
535			}
536		}
537
538		if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
539		{
540			while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
541				--num_samplers[s];
542			if((1 << s) & caps.stages_with_sampling)
543			{
544				void* samplers_to_bind[PIPE_MAX_SAMPLERS];
545				unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
546				for(unsigned i = 0; i < num_samplers_to_bind; ++i)
547				{
548					// index can be -1 to access sampler_csos[s].ld
549					samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
550				}
551				switch(s)
552				{
553				case PIPE_SHADER_VERTEX:
554					pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
555					break;
556				case PIPE_SHADER_FRAGMENT:
557					pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
558					break;
559				case PIPE_SHADER_GEOMETRY:
560					pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
561					break;
562				}
563			}
564		}
565	}
566
567	void update_state()
568	{
569		update_stage<D3D11_STAGE_PS>();
570		update_stage<D3D11_STAGE_VS>();
571		update_stage<D3D11_STAGE_GS>();
572#if API >= 11
573		update_stage<D3D11_STAGE_HS>();
574		update_stage<D3D11_STAGE_DS>();
575		update_stage<D3D11_STAGE_CS>();
576#endif
577
578		if(update_flags & UPDATE_VERTEX_BUFFERS)
579		{
580			while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
581				--num_vertex_buffers;
582			pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
583		}
584
585		update_flags = 0;
586	}
587
588	virtual void STDMETHODCALLTYPE IASetInputLayout(
589		ID3D11InputLayout *new_input_layout)
590	{
591		SYNCHRONIZED;
592		if(new_input_layout != input_layout.p)
593		{
594			input_layout = new_input_layout;
595			pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
596		}
597	}
598
599	virtual void STDMETHODCALLTYPE IAGetInputLayout(
600		ID3D11InputLayout **out_input_layout)
601	{
602		SYNCHRONIZED;
603		*out_input_layout = input_layout.ref();
604	}
605
606	virtual void STDMETHODCALLTYPE IASetVertexBuffers(
607		unsigned start,
608		unsigned count,
609		ID3D11Buffer *const *new_vertex_buffers,
610		const unsigned *new_strides,
611		const unsigned *new_offsets)
612	{
613		SYNCHRONIZED;
614		int last_different = -1;
615		for(unsigned i = 0; i < count; ++i)
616		{
617			ID3D11Buffer* buffer = new_vertex_buffers[i];
618			if(buffer != input_buffers[start + i].p
619				|| vertex_buffers[start + i].buffer_offset != new_offsets[i]
620				|| vertex_buffers[start + i].stride != new_offsets[i]
621			)
622			{
623				input_buffers[start + i] = buffer;
624				vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
625				vertex_buffers[start + i].buffer_offset = new_offsets[i];
626				vertex_buffers[start + i].stride = new_strides[i];
627				last_different = i;
628			}
629		}
630		if(last_different >= 0)
631		{
632			num_vertex_buffers = std::max(num_vertex_buffers, start + count);
633			update_flags |= UPDATE_VERTEX_BUFFERS;
634		}
635	}
636
637	virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
638		unsigned start,
639		unsigned count,
640		ID3D11Buffer **out_vertex_buffers,
641		unsigned *out_strides,
642		unsigned *out_offsets)
643	{
644		SYNCHRONIZED;
645		if(out_vertex_buffers)
646		{
647			for(unsigned i = 0; i < count; ++i)
648				out_vertex_buffers[i] = input_buffers[start + i].ref();
649		}
650
651		if(out_offsets)
652		{
653			for(unsigned i = 0; i < count; ++i)
654				out_offsets[i] = vertex_buffers[start + i].buffer_offset;
655		}
656
657		if(out_strides)
658		{
659			for(unsigned i = 0; i < count; ++i)
660				out_strides[i] = vertex_buffers[start + i].stride;
661		}
662	}
663
664	void set_index_buffer()
665	{
666		pipe_index_buffer ib;
667		if(!index_buffer)
668		{
669			memset(&ib, 0, sizeof(ib));
670		}
671		else
672		{
673			if(index_format == DXGI_FORMAT_R32_UINT)
674				ib.index_size = 4;
675			else if(index_format == DXGI_FORMAT_R16_UINT)
676				ib.index_size = 2;
677			else
678				ib.index_size = 1;
679			ib.offset = index_offset;
680			ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
681		}
682		pipe->set_index_buffer(pipe, &ib);
683	}
684
685	virtual void STDMETHODCALLTYPE IASetIndexBuffer(
686		ID3D11Buffer *new_index_buffer,
687		DXGI_FORMAT new_index_format,
688		unsigned new_index_offset)
689	{
690		SYNCHRONIZED;
691		if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
692		{
693			index_buffer = new_index_buffer;
694			index_format = new_index_format;
695			index_offset = new_index_offset;
696
697			set_index_buffer();
698		}
699	}
700
701	virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
702		ID3D11Buffer **out_index_buffer,
703		DXGI_FORMAT *out_index_format,
704		unsigned *out_index_offset)
705	{
706		SYNCHRONIZED;
707		if(out_index_buffer)
708			*out_index_buffer = index_buffer.ref();
709		if(out_index_format)
710			*out_index_format = index_format;
711		if(out_index_offset)
712			*out_index_offset = index_offset;
713	}
714
715	virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
716		D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
717	{
718		SYNCHRONIZED;
719		if(primitive_topology != new_primitive_topology)
720		{
721			if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
722				primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
723			else
724				primitive_mode = 0;
725			primitive_topology = new_primitive_topology;
726		}
727	}
728
729	virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
730		D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
731	{
732		SYNCHRONIZED;
733		*out_primitive_topology = primitive_topology;
734	}
735
736	virtual void STDMETHODCALLTYPE DrawIndexed(
737		unsigned index_count,
738		unsigned start_index_location,
739		int base_vertex_location)
740	{
741		SYNCHRONIZED;
742		if(update_flags)
743			update_state();
744
745		pipe_draw_info info;
746		info.mode = primitive_mode;
747		info.indexed = TRUE;
748		info.count = index_count;
749		info.start = start_index_location;
750		info.index_bias = base_vertex_location;
751		info.min_index = 0;
752		info.max_index = ~0;
753		info.start_instance = 0;
754		info.instance_count = 1;
755
756		pipe->draw_vbo(pipe, &info);
757	}
758
759	virtual void STDMETHODCALLTYPE Draw(
760		unsigned vertex_count,
761		unsigned start_vertex_location)
762	{
763		SYNCHRONIZED;
764		if(update_flags)
765			update_state();
766
767		pipe_draw_info info;
768		info.mode = primitive_mode;
769		info.indexed = FALSE;
770		info.count = vertex_count;
771		info.start = start_vertex_location;
772		info.index_bias = 0;
773		info.min_index = 0;
774		info.max_index = ~0;
775		info.start_instance = 0;
776		info.instance_count = 1;
777
778		pipe->draw_vbo(pipe, &info);
779	}
780
781	virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
782		unsigned index_countPerInstance,
783		unsigned instance_count,
784		unsigned start_index_location,
785		int base_vertex_location,
786		unsigned start_instance_location)
787	{
788		SYNCHRONIZED;
789		if(update_flags)
790			update_state();
791
792		pipe_draw_info info;
793		info.mode = primitive_mode;
794		info.indexed = TRUE;
795		info.count = index_countPerInstance;
796		info.start = start_index_location;
797		info.index_bias = base_vertex_location;
798		info.min_index = 0;
799		info.max_index = ~0;
800		info.start_instance = start_instance_location;
801		info.instance_count = instance_count;
802
803		pipe->draw_vbo(pipe, &info);
804	}
805
806	virtual void STDMETHODCALLTYPE DrawInstanced(
807		unsigned vertex_countPerInstance,
808		unsigned instance_count,
809		unsigned start_vertex_location,
810		unsigned start_instance_location)
811	{
812		SYNCHRONIZED;
813		if(update_flags)
814			update_state();
815
816		pipe_draw_info info;
817		info.mode = primitive_mode;
818		info.indexed = FALSE;
819		info.count = vertex_countPerInstance;
820		info.start = start_vertex_location;
821		info.index_bias = 0;
822		info.min_index = 0;
823		info.max_index = ~0;
824		info.start_instance = start_instance_location;
825		info.instance_count = instance_count;
826
827		pipe->draw_vbo(pipe, &info);
828	}
829
830	virtual void STDMETHODCALLTYPE DrawAuto(void)
831	{
832		if(!caps.so)
833			return;
834
835		SYNCHRONIZED;
836		if(update_flags)
837			update_state();
838
839		pipe->draw_stream_output(pipe, primitive_mode);
840	}
841
842	virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
843		ID3D11Buffer *buffer,
844		unsigned aligned_byte_offset)
845	{
846		SYNCHRONIZED;
847		if(update_flags)
848			update_state();
849
850		struct {
851			unsigned count;
852			unsigned instance_count;
853			unsigned start;
854			unsigned index_bias;
855		} data;
856
857		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
858
859		pipe_draw_info info;
860		info.mode = primitive_mode;
861		info.indexed = TRUE;
862		info.start = data.start;
863		info.count = data.count;
864		info.index_bias = data.index_bias;
865		info.min_index = 0;
866		info.max_index = ~0;
867		info.start_instance = 0;
868		info.instance_count = data.instance_count;
869
870		pipe->draw_vbo(pipe, &info);
871	}
872
873	virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
874		ID3D11Buffer *buffer,
875		unsigned aligned_byte_offset)
876	{
877		SYNCHRONIZED;
878		if(update_flags)
879			update_state();
880
881		struct {
882			unsigned count;
883			unsigned instance_count;
884			unsigned start;
885		} data;
886
887		pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
888
889		pipe_draw_info info;
890		info.mode = primitive_mode;
891		info.indexed = FALSE;
892		info.start = data.start;
893		info.count = data.count;
894		info.index_bias = 0;
895		info.min_index = 0;
896		info.max_index = ~0;
897		info.start_instance = 0;
898		info.instance_count = data.instance_count;
899
900		pipe->draw_vbo(pipe, &info);
901	}
902
903#if API >= 11
904	virtual void STDMETHODCALLTYPE Dispatch(
905		unsigned thread_group_count_x,
906		unsigned thread_group_count_y,
907		unsigned thread_group_count_z)
908	{
909// uncomment this when this is implemented
910//		SYNCHRONIZED;
911//		if(update_flags)
912//			update_state();
913	}
914
915	virtual void STDMETHODCALLTYPE DispatchIndirect(
916		ID3D11Buffer *buffer,
917		unsigned aligned_byte_offset)
918	{
919// uncomment this when this is implemented
920//		SYNCHRONIZED;
921//		if(update_flags)
922//			update_state();
923	}
924#endif
925
926	void set_clip()
927	{
928		pipe_clip_state clip;
929		clip.nr = 0;
930		clip.depth_clamp = depth_clamp;
931		pipe->set_clip_state(pipe, &clip);
932	}
933
934	virtual void STDMETHODCALLTYPE RSSetState(
935		ID3D11RasterizerState *new_rasterizer_state)
936	{
937		SYNCHRONIZED;
938		if(new_rasterizer_state != rasterizer_state.p)
939		{
940			rasterizer_state = new_rasterizer_state;
941			pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
942			bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
943			if(depth_clamp != new_depth_clamp)
944			{
945				depth_clamp = new_depth_clamp;
946				set_clip();
947			}
948		}
949	}
950
951	virtual void STDMETHODCALLTYPE RSGetState(
952		ID3D11RasterizerState **out_rasterizer_state)
953	{
954		SYNCHRONIZED;
955		*out_rasterizer_state = rasterizer_state.ref();
956	}
957
958	void set_viewport()
959	{
960		// TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
961		pipe_viewport_state viewport;
962		float half_width = viewports[0].Width * 0.5f;
963		float half_height = viewports[0].Height * 0.5f;
964
965		viewport.scale[0] = half_width;
966		viewport.scale[1] = -half_height;
967		viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
968		viewport.scale[3] = 1.0f;
969		viewport.translate[0] = half_width + viewports[0].TopLeftX;
970		viewport.translate[1] = half_height + viewports[0].TopLeftY;
971		viewport.translate[2] = viewports[0].MinDepth;
972		viewport.translate[3] = 1.0f;
973		pipe->set_viewport_state(pipe, &viewport);
974	}
975
976	virtual void STDMETHODCALLTYPE RSSetViewports(
977		unsigned count,
978		const D3D11_VIEWPORT *new_viewports)
979	{
980		SYNCHRONIZED;
981		if(count)
982		{
983			if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
984			{
985				viewports[0] = new_viewports[0];
986				set_viewport();
987			}
988			for(unsigned i = 1; i < count; ++i)
989				viewports[i] = new_viewports[i];
990		}
991		else if(num_viewports)
992		{
993			// TODO: what should we do here?
994			memset(&viewports[0], 0, sizeof(viewports[0]));
995			set_viewport();
996		}
997		num_viewports = count;
998	}
999
1000	virtual void STDMETHODCALLTYPE RSGetViewports(
1001		unsigned *out_count,
1002		D3D11_VIEWPORT *out_viewports)
1003	{
1004		SYNCHRONIZED;
1005		if(out_viewports)
1006		{
1007			unsigned i;
1008			for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1009				out_viewports[i] = viewports[i];
1010
1011			memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1012		}
1013
1014		*out_count = num_viewports;
1015	}
1016
1017	void set_scissor()
1018	{
1019		pipe_scissor_state scissor;
1020		scissor.minx = scissor_rects[0].left;
1021		scissor.miny = scissor_rects[0].top;
1022		scissor.maxx = scissor_rects[0].right;
1023		scissor.maxy = scissor_rects[0].bottom;
1024		pipe->set_scissor_state(pipe, &scissor);
1025	}
1026
1027	virtual void STDMETHODCALLTYPE RSSetScissorRects(
1028		unsigned count,
1029		const D3D11_RECT *new_rects)
1030	{
1031		SYNCHRONIZED;
1032		if(count)
1033		{
1034			if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1035			{
1036				scissor_rects[0] = new_rects[0];
1037				set_scissor();
1038			}
1039			for(unsigned i = 1; i < count; ++i)
1040				scissor_rects[i] = new_rects[i];
1041		}
1042		else if(num_scissor_rects)
1043		{
1044			// TODO: what should we do here?
1045			memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1046			set_scissor();
1047		}
1048
1049		num_scissor_rects = count;
1050	}
1051
1052	virtual void STDMETHODCALLTYPE RSGetScissorRects(
1053		unsigned *out_count,
1054		D3D11_RECT *out_rects)
1055	{
1056		SYNCHRONIZED;
1057		if(out_rects)
1058		{
1059			unsigned i;
1060			for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1061				out_rects[i] = scissor_rects[i];
1062
1063			memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1064		}
1065
1066		*out_count = num_scissor_rects;
1067	}
1068
1069	virtual void STDMETHODCALLTYPE OMSetBlendState(
1070		ID3D11BlendState *new_blend_state,
1071		const float new_blend_factor[4],
1072		unsigned new_sample_mask)
1073	{
1074		SYNCHRONIZED;
1075		float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1076
1077		if(blend_state.p != new_blend_state)
1078		{
1079			pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1080			blend_state = new_blend_state;
1081		}
1082
1083		// Windows D3D11 does this, even though it's apparently undocumented
1084		if(!new_blend_factor)
1085			new_blend_factor = white;
1086
1087		if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1088		{
1089			pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1090			memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1091		}
1092
1093		if(sample_mask != new_sample_mask)
1094		{
1095			pipe->set_sample_mask(pipe, new_sample_mask);
1096			sample_mask = new_sample_mask;
1097		}
1098	}
1099
1100	virtual void STDMETHODCALLTYPE OMGetBlendState(
1101		ID3D11BlendState **out_blend_state,
1102		float out_blend_factor[4],
1103		unsigned *out_sample_mask)
1104	{
1105		SYNCHRONIZED;
1106		if(out_blend_state)
1107			*out_blend_state = blend_state.ref();
1108		if(out_blend_factor)
1109			memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1110		if(out_sample_mask)
1111			*out_sample_mask = sample_mask;
1112	}
1113
1114	void set_stencil_ref()
1115	{
1116		struct pipe_stencil_ref sref;
1117		sref.ref_value[0] = stencil_ref;
1118		sref.ref_value[1] = stencil_ref;
1119		pipe->set_stencil_ref(pipe, &sref);
1120	}
1121
1122	virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1123		ID3D11DepthStencilState *new_depth_stencil_state,
1124		unsigned new_stencil_ref)
1125	{
1126		SYNCHRONIZED;
1127		if(new_depth_stencil_state != depth_stencil_state.p)
1128		{
1129			pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1130			depth_stencil_state = new_depth_stencil_state;
1131		}
1132
1133		if(new_stencil_ref != stencil_ref)
1134		{
1135			stencil_ref = new_stencil_ref;
1136			set_stencil_ref();
1137		}
1138	}
1139
1140	virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1141		ID3D11DepthStencilState **out_depth_stencil_state,
1142		unsigned *out_stencil_ref)
1143	{
1144		SYNCHRONIZED;
1145		if(*out_depth_stencil_state)
1146			*out_depth_stencil_state = depth_stencil_state.ref();
1147		if(out_stencil_ref)
1148			*out_stencil_ref = stencil_ref;
1149	}
1150
1151	void set_framebuffer()
1152	{
1153		struct pipe_framebuffer_state fb;
1154		memset(&fb, 0, sizeof(fb));
1155		if(depth_stencil_view)
1156		{
1157			struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1158			fb.zsbuf = surf;
1159			if(surf->width > fb.width)
1160				fb.width = surf->width;
1161			if(surf->height > fb.height)
1162				fb.height = surf->height;
1163		}
1164		fb.nr_cbufs = num_render_target_views;
1165		unsigned i;
1166		for(i = 0; i < num_render_target_views; ++i)
1167		{
1168			if(render_target_views[i])
1169			{
1170				struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1171				fb.cbufs[i] = surf;
1172				if(surf->width > fb.width)
1173					fb.width = surf->width;
1174				if(surf->height > fb.height)
1175					fb.height = surf->height;
1176			}
1177		}
1178
1179		pipe->set_framebuffer_state(pipe, &fb);
1180	}
1181
1182	/* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1183	 * Hopefully nobody relies on this happening
1184	 */
1185
1186	virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1187		unsigned count,
1188		ID3D11RenderTargetView *const *new_render_target_views,
1189		ID3D11DepthStencilView  *new_depth_stencil_view)
1190	{
1191		SYNCHRONIZED;
1192		if(!new_render_target_views)
1193			count = 0;
1194		if(count == num_render_target_views)
1195		{
1196			for(unsigned i = 0; i < count; ++i)
1197			{
1198				if(new_render_target_views[i] != render_target_views[i].p)
1199					goto changed;
1200			}
1201			return;
1202		}
1203changed:
1204		depth_stencil_view = new_depth_stencil_view;
1205		unsigned i;
1206		for(i = 0; i < count; ++i)
1207		{
1208			render_target_views[i] = new_render_target_views[i];
1209#if API >= 11
1210			om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1211#endif
1212		}
1213		for(; i < num_render_target_views; ++i)
1214			render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1215		num_render_target_views = count;
1216		set_framebuffer();
1217	}
1218
1219	virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1220		unsigned count,
1221		ID3D11RenderTargetView **out_render_target_views,
1222		ID3D11DepthStencilView  **out_depth_stencil_view)
1223	{
1224		SYNCHRONIZED;
1225		if(out_render_target_views)
1226		{
1227			unsigned i;
1228			for(i = 0; i < std::min(num_render_target_views, count); ++i)
1229				out_render_target_views[i] = render_target_views[i].ref();
1230
1231			for(; i < count; ++i)
1232				out_render_target_views[i] = 0;
1233		}
1234
1235		if(out_depth_stencil_view)
1236			*out_depth_stencil_view = depth_stencil_view.ref();
1237	}
1238
1239#if API >= 11
1240	/* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1241	virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1242		unsigned rtv_count,
1243		ID3D11RenderTargetView *const *new_render_target_views,
1244		ID3D11DepthStencilView  *new_depth_stencil_view,
1245		unsigned uav_start,
1246		unsigned uav_count,
1247		ID3D11UnorderedAccessView *const *new_unordered_access_views,
1248		const unsigned *new_uav_initial_counts)
1249	{
1250		SYNCHRONIZED;
1251		if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1252			OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1253
1254		if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1255		{
1256			for(unsigned i = 0; i < uav_count; ++i)
1257			{
1258				om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1259				render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1260			}
1261		}
1262	}
1263
1264	virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1265		unsigned rtv_count,
1266		ID3D11RenderTargetView **out_render_target_views,
1267		ID3D11DepthStencilView  **out_depth_stencil_view,
1268		unsigned uav_start,
1269		unsigned uav_count,
1270		ID3D11UnorderedAccessView **out_unordered_access_views)
1271	{
1272		SYNCHRONIZED;
1273		if(out_render_target_views)
1274			OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1275
1276		if(out_unordered_access_views)
1277		{
1278			for(unsigned i = 0; i < uav_count; ++i)
1279				out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1280		}
1281	}
1282#endif
1283
1284	virtual void STDMETHODCALLTYPE SOSetTargets(
1285		unsigned count,
1286		ID3D11Buffer *const *new_so_targets,
1287		const unsigned *new_offsets)
1288	{
1289		SYNCHRONIZED;
1290		unsigned i;
1291		if(!new_so_targets)
1292			count = 0;
1293		bool changed = false;
1294		for(i = 0; i < count; ++i)
1295		{
1296			ID3D11Buffer* buffer = new_so_targets[i];
1297			if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1298			{
1299				so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1300				so_targets[i] = buffer;
1301				so_offsets[i] = new_offsets[i];
1302				changed = true;
1303			}
1304		}
1305		for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1306		{
1307			if(so_targets[i].p || so_offsets[i])
1308			{
1309				changed = true;
1310				so_targets[i] = (ID3D11Buffer*)0;
1311				so_offsets[i] = 0;
1312			}
1313		}
1314		num_so_targets = count;
1315
1316		if(changed && caps.so)
1317			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1318	}
1319
1320	virtual void STDMETHODCALLTYPE SOGetTargets(
1321		unsigned count,
1322		ID3D11Buffer **out_so_targets
1323#if API < 11
1324		, UINT *out_offsets
1325#endif
1326		)
1327	{
1328		SYNCHRONIZED;
1329		for(unsigned i = 0; i < count; ++i)
1330		{
1331			out_so_targets[i] = so_targets[i].ref();
1332#if API < 11
1333			out_offsets[i] = so_offsets[i];
1334#endif
1335		}
1336	}
1337
1338	virtual void STDMETHODCALLTYPE Begin(
1339		ID3D11Asynchronous *async)
1340	{
1341		SYNCHRONIZED;
1342		if(caps.queries)
1343			pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1344	}
1345
1346	virtual void STDMETHODCALLTYPE End(
1347		ID3D11Asynchronous *async)
1348	{
1349		SYNCHRONIZED;
1350		if(caps.queries)
1351			pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1352	}
1353
1354	virtual HRESULT STDMETHODCALLTYPE GetData(
1355		ID3D11Asynchronous *iasync,
1356		void *out_data,
1357		unsigned data_size,
1358		unsigned get_data_flags)
1359	{
1360		SYNCHRONIZED;
1361		if(!caps.queries)
1362			return E_NOTIMPL;
1363
1364		GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1365		void* tmp_data = alloca(async->data_size);
1366		boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1367		if(out_data)
1368			memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1369		return ret ? S_OK : S_FALSE;
1370	}
1371
1372	void set_render_condition()
1373	{
1374		if(caps.render_condition)
1375		{
1376			if(!render_predicate)
1377				pipe->render_condition(pipe, 0, 0);
1378			else
1379			{
1380				GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1381				if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1382				{
1383					unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1384					pipe->render_condition(pipe, predicate->query, mode);
1385				}
1386				else
1387				{
1388					/* TODO: add inverted predication to Gallium*/
1389					pipe->render_condition(pipe, 0, 0);
1390				}
1391			}
1392		}
1393	}
1394
1395	virtual void STDMETHODCALLTYPE SetPredication(
1396		ID3D11Predicate *new_predicate,
1397		BOOL new_predicate_value)
1398	{
1399		SYNCHRONIZED;
1400		if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1401		{
1402			render_predicate = new_predicate;
1403			render_predicate_value = new_predicate_value;
1404			set_render_condition();
1405		}
1406	}
1407
1408	virtual void STDMETHODCALLTYPE GetPredication(
1409		ID3D11Predicate **out_predicate,
1410		BOOL *out_predicate_value)
1411	{
1412		SYNCHRONIZED;
1413		if(out_predicate)
1414			*out_predicate = render_predicate.ref();
1415		if(out_predicate_value)
1416			*out_predicate_value = render_predicate_value;
1417	}
1418
1419	static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1420	{
1421		if(subresource <= resource->last_level)
1422		{
1423			return subresource;
1424		}
1425		else
1426		{
1427			unsigned levels = resource->last_level + 1;
1428			return subresource % levels;
1429		}
1430	}
1431
1432	static unsigned d3d11_subresource_to_face(struct pipe_resource* resource, unsigned subresource)
1433	{
1434		if(subresource <= resource->last_level)
1435		{
1436			return 0;
1437		}
1438		else
1439		{
1440			unsigned levels = resource->last_level + 1;
1441			return subresource / levels;
1442		}
1443	}
1444
1445
1446	/* TODO: deferred contexts will need a different implementation of this,
1447	 * because we can't put the transfer info into the resource itself.
1448	 * Also, there are very different restrictions, for obvious reasons.
1449	 */
1450	virtual HRESULT STDMETHODCALLTYPE Map(
1451		ID3D11Resource *iresource,
1452		unsigned subresource,
1453		D3D11_MAP map_type,
1454		unsigned map_flags,
1455		D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1456	{
1457		SYNCHRONIZED;
1458		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1459		if(resource->transfers.count(subresource))
1460			return E_FAIL;
1461		unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1462		unsigned face = d3d11_subresource_to_face(resource->resource, subresource);
1463		pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1464		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1465		unsigned usage = 0;
1466		if(map_type == D3D11_MAP_READ)
1467			usage = PIPE_TRANSFER_READ;
1468		else if(map_type == D3D11_MAP_WRITE)
1469			usage = PIPE_TRANSFER_WRITE;
1470		else if(map_type == D3D11_MAP_READ_WRITE)
1471			usage = PIPE_TRANSFER_READ_WRITE;
1472		else if(map_type == D3D11_MAP_WRITE_DISCARD)
1473			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1474		else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1475			usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1476		else
1477			return E_INVALIDARG;
1478		if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1479			usage |= PIPE_TRANSFER_DONTBLOCK;
1480		struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1481		if(!transfer) {
1482			if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1483				return DXGI_ERROR_WAS_STILL_DRAWING;
1484			else
1485				return E_FAIL;
1486		}
1487		resource->transfers[subresource] = transfer;
1488		mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1489		mapped_resource->RowPitch = transfer->stride;
1490		mapped_resource->DepthPitch = transfer->layer_stride;
1491		return S_OK;
1492	}
1493
1494	virtual void STDMETHODCALLTYPE Unmap(
1495		ID3D11Resource *iresource,
1496		unsigned subresource)
1497	{
1498		SYNCHRONIZED;
1499		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1500		std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1501		if(i != resource->transfers.end())
1502		{
1503			pipe->transfer_unmap(pipe, i->second);
1504			pipe->transfer_destroy(pipe, i->second);
1505			resource->transfers.erase(i);
1506		}
1507	}
1508
1509	virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1510		ID3D11Resource *dst_resource,
1511		unsigned dst_subresource,
1512		unsigned dst_x,
1513		unsigned dst_y,
1514		unsigned dst_z,
1515		ID3D11Resource *src_resource,
1516		unsigned src_subresource,
1517		const D3D11_BOX *src_box)
1518	{
1519		SYNCHRONIZED;
1520		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1521		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1522		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1523		unsigned dst_face = d3d11_subresource_to_face(dst->resource, dst_subresource);
1524		unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1525		unsigned src_face = d3d11_subresource_to_face(src->resource, src_subresource);
1526		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1527		pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1528		{
1529			pipe->resource_copy_region(pipe,
1530				dst->resource, dst_level, dst_x, dst_y, dst_z,
1531				src->resource, src_level, &box);
1532		}
1533	}
1534
1535	virtual void STDMETHODCALLTYPE CopyResource(
1536		ID3D11Resource *dst_resource,
1537		ID3D11Resource *src_resource)
1538	{
1539		SYNCHRONIZED;
1540		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1541		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1542		unsigned level;
1543		for(level = 0; level <= dst->resource->last_level; ++level)
1544		{
1545		        unsigned layers = 1;
1546			pipe_box box;
1547			if (dst->resource->target == PIPE_TEXTURE_CUBE)
1548				layers = 6;
1549			else if (dst->resource->target == PIPE_TEXTURE_3D)
1550				layers = u_minify(dst->resource->depth0, level);
1551			/* else layers = dst->resource->array_size; */
1552			box.x = box.y = box.z = 0;
1553			box.width = u_minify(dst->resource->width0, level);
1554			box.height = u_minify(dst->resource->height0, level);
1555			box.depth = layers;
1556			pipe->resource_copy_region(pipe,
1557						   dst->resource, level, 0, 0, 0,
1558						   src->resource, level, &box);
1559		}
1560	}
1561
1562	virtual void STDMETHODCALLTYPE UpdateSubresource(
1563		ID3D11Resource *dst_resource,
1564		unsigned dst_subresource,
1565		const D3D11_BOX *pDstBox,
1566		const void *pSrcData,
1567		unsigned src_row_pitch,
1568		unsigned src_depth_pitch)
1569	{
1570		SYNCHRONIZED;
1571		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1572		unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1573		/* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1574		pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1575		pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1576	}
1577
1578#if API >= 11
1579	virtual void STDMETHODCALLTYPE CopyStructureCount(
1580		ID3D11Buffer *dst_buffer,
1581		unsigned dst_aligned_byte_offset,
1582		ID3D11UnorderedAccessView *src_view)
1583	{
1584		SYNCHRONIZED;
1585	}
1586#endif
1587
1588	virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1589		ID3D11RenderTargetView *render_target_view,
1590		const float color[4])
1591	{
1592		SYNCHRONIZED;
1593		GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1594		pipe->clear_render_target(pipe, view->object, color, 0, 0, view->object->width, view->object->height);
1595	}
1596
1597	virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1598		ID3D11DepthStencilView  *depth_stencil_view,
1599		unsigned clear_flags,
1600		float depth,
1601		UINT8 stencil)
1602	{
1603		SYNCHRONIZED;
1604		GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1605		unsigned flags = 0;
1606		if(clear_flags & D3D11_CLEAR_DEPTH)
1607			flags |= PIPE_CLEAR_DEPTH;
1608		if(clear_flags & D3D11_CLEAR_STENCIL)
1609			flags |= PIPE_CLEAR_STENCIL;
1610		pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1611	}
1612
1613#if API >= 11
1614	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1615		ID3D11UnorderedAccessView *unordered_access_view,
1616		const unsigned values[4])
1617	{
1618		SYNCHRONIZED;
1619	}
1620
1621	virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1622			ID3D11UnorderedAccessView *unordered_access_view,
1623			const float values[4])
1624	{
1625		SYNCHRONIZED;
1626	}
1627#endif
1628
1629	void restore_gallium_state_blit_only()
1630	{
1631		pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1632		pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1633		pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1634		pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1635		pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1636		pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1637		if(caps.gs)
1638			pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1639		set_framebuffer();
1640		set_viewport();
1641		set_clip();
1642		set_render_condition();
1643		// TODO: restore stream output
1644
1645		update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1646	}
1647
1648	virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1649	{
1650		SYNCHRONIZED;
1651		restore_gallium_state_blit_only();
1652	}
1653
1654	virtual void STDMETHODCALLTYPE GenerateMips(
1655		ID3D11ShaderResourceView *shader_resource_view)
1656	{
1657		SYNCHRONIZED;
1658
1659		GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1660		if(caps.gs)
1661			pipe->bind_gs_state(pipe, 0);
1662		if(caps.so)
1663			pipe->bind_stream_output_state(pipe, 0);
1664		if(pipe->render_condition)
1665			pipe->render_condition(pipe, 0, 0);
1666		util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1667		restore_gallium_state_blit_only();
1668	}
1669
1670	virtual void STDMETHODCALLTYPE RestoreGalliumState()
1671	{
1672		SYNCHRONIZED;
1673		restore_gallium_state_blit_only();
1674
1675		set_index_buffer();
1676		set_stencil_ref();
1677		pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1678		pipe->set_sample_mask(pipe, sample_mask);
1679
1680		for(unsigned s = 0; s < 3; ++s)
1681		{
1682			unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1683			for(unsigned i = 0; i < num; ++i)
1684				pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1685		}
1686
1687		if(caps.so)
1688			pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1689
1690		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1691		update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1692
1693		set_scissor();
1694	}
1695
1696#if API >= 11
1697	/* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1698	virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1699		ID3D11Resource *iresource,
1700		float min_lod)
1701	{
1702		SYNCHRONIZED;
1703		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1704		if(resource->min_lod != min_lod)
1705		{
1706			// TODO: actually do anything?
1707			resource->min_lod = min_lod;
1708		}
1709	}
1710
1711	virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1712		ID3D11Resource *iresource)
1713	{
1714		SYNCHRONIZED;
1715		GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1716		return resource->min_lod;
1717	}
1718#endif
1719
1720	virtual void STDMETHODCALLTYPE ResolveSubresource(
1721		ID3D11Resource *dst_resource,
1722		unsigned dst_subresource,
1723		ID3D11Resource *src_resource,
1724		unsigned src_subresource,
1725		DXGI_FORMAT format)
1726	{
1727		SYNCHRONIZED;
1728		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1729		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1730		struct pipe_resolve_info info;
1731
1732		info.dst.res = dst->resource;
1733		info.src.res = src->resource;
1734		info.dst.level = 0;
1735		info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
1736		info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource);
1737
1738		info.src.x0 = 0;
1739		info.src.x1 = info.src.res->width0;
1740		info.src.y0 = 0;
1741		info.src.y1 = info.src.res->height0;
1742		info.dst.x0 = 0;
1743		info.dst.x1 = info.dst.res->width0;
1744		info.dst.y0 = 0;
1745		info.dst.y1 = info.dst.res->height0;
1746
1747		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
1748
1749		pipe->resource_resolve(pipe, &info);
1750	}
1751
1752#if API >= 11
1753	virtual void STDMETHODCALLTYPE ExecuteCommandList(
1754		ID3D11CommandList *command_list,
1755		BOOL restore_context_state)
1756	{
1757		SYNCHRONIZED;
1758	}
1759
1760	virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1761		BOOL restore_deferred_context_state,
1762		ID3D11CommandList **out_command_list)
1763	{
1764		SYNCHRONIZED;
1765		return E_NOTIMPL;
1766	}
1767#endif
1768
1769	virtual void STDMETHODCALLTYPE ClearState(void)
1770	{
1771		/* we don't take a lock here because we would deadlock otherwise
1772		 * TODO: this is probably incorrect, because ClearState should likely be atomic.
1773		 * However, I can't think of any correct usage that would be affected by this
1774		 * being non-atomic, and making this atomic is quite expensive and complicates
1775		 * the code
1776		 */
1777
1778		// we qualify all calls so that we avoid virtual dispatch and might get them inlined
1779		// TODO: make sure all this gets inlined, which might require more compiler flags
1780		// TODO: optimize this
1781#if API >= 11
1782		GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1783		GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1784		GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1785		GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1786		GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1787		GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1788#else
1789		GalliumD3D11DeviceContext::PSSetShader(0);
1790		GalliumD3D11DeviceContext::GSSetShader(0);
1791		GalliumD3D11DeviceContext::VSSetShader(0);
1792#endif
1793
1794		GalliumD3D11DeviceContext::IASetInputLayout(0);
1795		GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1796		GalliumD3D11DeviceContext::RSSetState(0);
1797		GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1798		GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1799		GalliumD3D11DeviceContext::SetPredication(0, 0);
1800		GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1801
1802		GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1803		GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1804		GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1805#if API >= 11
1806		GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1807		GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1808		GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1809#endif
1810
1811		GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1812#if API >= 11
1813		GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1814#else
1815		GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1816#endif
1817		GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1818
1819		GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1820		GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1821		GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1822#if API >= 11
1823		GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1824		GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1825		GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1826#endif
1827
1828		GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1829		GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1830		GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1831#if API >= 11
1832		GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1833		GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1834		GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1835#endif
1836
1837		GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1838		GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1839	}
1840
1841	virtual void STDMETHODCALLTYPE Flush(void)
1842	{
1843		SYNCHRONIZED;
1844                pipe->flush(pipe, 0);
1845	}
1846
1847	/* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1848	 * cleanly unbound from the pipeline.
1849	 * In Direct3D 11, the pipeline holds a reference.
1850	 *
1851	 * Note that instead of always scanning the pipeline on destruction, we could
1852	 * maintain the internal reference count on DirectX 10 and use it to check if an
1853	 * object is still bound.
1854	 * Presumably, on average, scanning is faster if the application is well written.
1855	 */
1856#if API < 11
1857#define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1858	void Unbind##name(ID3D11##name* state) \
1859	{ \
1860		SYNCHRONIZED; \
1861		if((void*)state == (void*)member.p) \
1862		{ \
1863			member.p = 0; \
1864			pipe->bind_##gallium##_state(pipe, default_##def); \
1865		} \
1866	}
1867	IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1868	IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1869	IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1870	IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1871	IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1872	IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1873	IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1874
1875	void UnbindPredicate(ID3D11Predicate* predicate)
1876	{
1877		SYNCHRONIZED;
1878		if(predicate == render_predicate)
1879		{
1880			render_predicate.p = NULL;
1881			render_predicate_value = 0;
1882			pipe->render_condition(pipe, 0, 0);
1883		}
1884	}
1885
1886	void UnbindSamplerState(ID3D11SamplerState* state)
1887	{
1888		SYNCHRONIZED;
1889		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1890		{
1891			for(unsigned i = 0; i < num_samplers[s]; ++i)
1892			{
1893				if(samplers[s][i] == state)
1894				{
1895					samplers[s][i].p = NULL;
1896					sampler_csos[s].v[i] = NULL;
1897					update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1898				}
1899			}
1900		}
1901	}
1902
1903	void UnbindBuffer(ID3D11Buffer* buffer)
1904	{
1905		SYNCHRONIZED;
1906		if(buffer == index_buffer)
1907		{
1908			index_buffer.p = 0;
1909			index_format = DXGI_FORMAT_UNKNOWN;
1910			index_offset = 0;
1911			struct pipe_index_buffer ib;
1912			memset(&ib, 0, sizeof(ib));
1913			pipe->set_index_buffer(pipe, &ib);
1914		}
1915
1916		for(unsigned i = 0; i < num_vertex_buffers; ++i)
1917		{
1918			if(buffer == input_buffers[i])
1919			{
1920				input_buffers[i].p = 0;
1921				memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1922				update_flags |= UPDATE_VERTEX_BUFFERS;
1923			}
1924		}
1925
1926		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1927		{
1928			for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1929			{
1930				if(constant_buffers[s][i] == buffer)
1931				{
1932					constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1933					pipe->set_constant_buffer(pipe, s, i, NULL);
1934				}
1935			}
1936		}
1937	}
1938
1939	void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1940	{
1941		SYNCHRONIZED;
1942		if(view == depth_stencil_view)
1943		{
1944			depth_stencil_view.p = NULL;
1945			set_framebuffer();
1946		}
1947	}
1948
1949	void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1950	{
1951		SYNCHRONIZED;
1952		bool any_bound = false;
1953		for(unsigned i = 0; i < num_render_target_views; ++i)
1954		{
1955			if(render_target_views[i] == view)
1956			{
1957				render_target_views[i].p = NULL;
1958				any_bound = true;
1959			}
1960		}
1961		if(any_bound)
1962			set_framebuffer();
1963	}
1964
1965	void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1966	{
1967		SYNCHRONIZED;
1968		for(unsigned s = 0; s < D3D11_STAGES; ++s)
1969		{
1970			for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1971			{
1972				if(shader_resource_views[s][i] == view)
1973				{
1974					shader_resource_views[s][i].p = NULL;
1975					sampler_views[s][i] = NULL;
1976					update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1977				}
1978			}
1979		}
1980	}
1981#endif
1982
1983#undef SYNCHRONIZED
1984};
1985
1986#if API >= 11
1987/* This approach serves two purposes.
1988 * First, we don't want to do an atomic operation to manipulate the reference
1989 * count every time something is bound/unbound to the pipeline, since they are
1990 * expensive.
1991 * Fortunately, the immediate context can only be used by a single thread, so
1992 * we don't have to use them, as long as a separate reference count is used
1993 * (see dual_refcnt_t).
1994 *
1995 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1996 * garbage cycle.
1997 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
1998 * one for each external reference count, while internal nonatomic_add_ref doesn't
1999 * add any.
2000 *
2001 * Note that ideally we would to eliminate the non-atomic op too, but this is more
2002 * complicated, since we would either need to use garbage collection and give up
2003 * deterministic destruction (especially bad for large textures), or scan the whole
2004 * pipeline state every time the reference count of object drops to 0, which risks
2005 * pathological slowdowns.
2006 *
2007 * Since this microoptimization should matter relatively little, let's avoid it for now.
2008 *
2009 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
2010 * Eliminating the atomic ops for deferred contexts seems substantially harder.
2011 * This might be a problem if they are used in a one-shot multithreaded rendering
2012 * fashion, where SMP cacheline bouncing on the reference count may be visible.
2013 *
2014 * The idea would be to attach a structure of reference counts indexed by deferred
2015 * context id to each object. Ideally, this should be organized like ext2 block pointers.
2016 *
2017 * Every deferred context would get a reference count in its own cacheline.
2018 * The external count is protected by a lock bit, and there is also a "lock bit" in each
2019 * internal count.
2020 *
2021 * When the external count has to be dropped to 0, the lock bit is taken and all internal
2022 * reference counts are scanned, taking a count of them. A flag would also be set on them.
2023 * Deferred context manipulation would notice the flag, and update the count.
2024 * Once the count goes to zero, the object is freed.
2025 *
2026 * The problem of this is that if the external reference count ping-pongs between
2027 * zero and non-zero, the scans will take a lot of time.
2028 *
2029 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2030 * each binary tree node would have a "determined bit", which would be invalidated
2031 * by manipulations.
2032 *
2033 * However, all this complexity might actually be a loss in most cases, so let's just
2034 * stick to a single atomic refcnt for now.
2035 *
2036 * Also, we don't even support deferred contexts yet, so this can wait.
2037 */
2038struct nonatomic_device_child_ptr_traits
2039{
2040	static void add_ref(void* p)
2041	{
2042		if(p)
2043			((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2044	}
2045
2046	static void release(void* p)
2047	{
2048		if(p)
2049			((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2050	}
2051};
2052
2053struct GalliumD3D11ImmediateDeviceContext
2054	: public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2055{
2056	GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2057	: GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2058	{
2059		// not necessary, but tests that the API at least basically works
2060		ClearState();
2061	}
2062
2063	/* we do this since otherwise we would have a garbage cycle between this and the device */
2064	virtual ULONG STDMETHODCALLTYPE AddRef()
2065	{
2066		return this->device->AddRef();
2067	}
2068
2069	virtual ULONG STDMETHODCALLTYPE Release()
2070	{
2071		return this->device->Release();
2072	}
2073
2074	virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2075	{
2076		return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2077	}
2078};
2079
2080static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2081{
2082	return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2083}
2084
2085static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2086{
2087	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2088}
2089
2090static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2091{
2092	((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2093}
2094
2095static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2096{
2097	delete (GalliumD3D11ImmediateDeviceContext*)context;
2098}
2099#endif
2100