si_state.c revision eca57f85ee1f47b32daa641a19d8d386c58eb1de
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Christian König <christian.koenig@amd.com>
25 */
26
27#include "si_pipe.h"
28#include "sid.h"
29#include "radeon/r600_cs.h"
30#include "radeon/r600_query.h"
31
32#include "util/u_dual_blend.h"
33#include "util/u_format.h"
34#include "util/u_format_s3tc.h"
35#include "util/u_memory.h"
36#include "util/u_resource.h"
37
38/* Initialize an external atom (owned by ../radeon). */
39static void
40si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
41		      struct r600_atom **list_elem)
42{
43	atom->id = list_elem - sctx->atoms.array + 1;
44	*list_elem = atom;
45}
46
47/* Initialize an atom owned by radeonsi.  */
48void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
49		  struct r600_atom **list_elem,
50		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
51{
52	atom->emit = (void*)emit_func;
53	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
54	*list_elem = atom;
55}
56
57static unsigned si_map_swizzle(unsigned swizzle)
58{
59	switch (swizzle) {
60	case PIPE_SWIZZLE_Y:
61		return V_008F0C_SQ_SEL_Y;
62	case PIPE_SWIZZLE_Z:
63		return V_008F0C_SQ_SEL_Z;
64	case PIPE_SWIZZLE_W:
65		return V_008F0C_SQ_SEL_W;
66	case PIPE_SWIZZLE_0:
67		return V_008F0C_SQ_SEL_0;
68	case PIPE_SWIZZLE_1:
69		return V_008F0C_SQ_SEL_1;
70	default: /* PIPE_SWIZZLE_X */
71		return V_008F0C_SQ_SEL_X;
72	}
73}
74
75static uint32_t S_FIXED(float value, uint32_t frac_bits)
76{
77	return value * (1 << frac_bits);
78}
79
80/* 12.4 fixed-point */
81static unsigned si_pack_float_12p4(float x)
82{
83	return x <= 0    ? 0 :
84	       x >= 4096 ? 0xffff : x * 16;
85}
86
87/*
88 * Inferred framebuffer and blender state.
89 *
90 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
91 * if there is not enough PS outputs.
92 */
93static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
94{
95	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
96	struct si_state_blend *blend = sctx->queued.named.blend;
97	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
98	 * but you never know. */
99	uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
100	unsigned i;
101
102	if (blend)
103		cb_target_mask &= blend->cb_target_mask;
104
105	/* Avoid a hang that happens when dual source blending is enabled
106	 * but there is not enough color outputs. This is undefined behavior,
107	 * so disable color writes completely.
108	 *
109	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
110	 */
111	if (blend && blend->dual_src_blend &&
112	    sctx->ps_shader.cso &&
113	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
114		cb_target_mask = 0;
115
116	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
117
118	/* STONEY-specific register settings. */
119	if (sctx->b.family == CHIP_STONEY) {
120		unsigned spi_shader_col_format =
121			sctx->ps_shader.cso ?
122			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
123		unsigned sx_ps_downconvert = 0;
124		unsigned sx_blend_opt_epsilon = 0;
125		unsigned sx_blend_opt_control = 0;
126
127		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
128			struct r600_surface *surf =
129				(struct r600_surface*)sctx->framebuffer.state.cbufs[i];
130			unsigned format, swap, spi_format, colormask;
131			bool has_alpha, has_rgb;
132
133			if (!surf)
134				continue;
135
136			format = G_028C70_FORMAT(surf->cb_color_info);
137			swap = G_028C70_COMP_SWAP(surf->cb_color_info);
138			spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
139			colormask = (cb_target_mask >> (i * 4)) & 0xf;
140
141			/* Set if RGB and A are present. */
142			has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
143
144			if (format == V_028C70_COLOR_8 ||
145			    format == V_028C70_COLOR_16 ||
146			    format == V_028C70_COLOR_32)
147				has_rgb = !has_alpha;
148			else
149				has_rgb = true;
150
151			/* Check the colormask and export format. */
152			if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
153				has_rgb = false;
154			if (!(colormask & PIPE_MASK_A))
155				has_alpha = false;
156
157			if (spi_format == V_028714_SPI_SHADER_ZERO) {
158				has_rgb = false;
159				has_alpha = false;
160			}
161
162			/* Disable value checking for disabled channels. */
163			if (!has_rgb)
164				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
165			if (!has_alpha)
166				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
167
168			/* Enable down-conversion for 32bpp and smaller formats. */
169			switch (format) {
170			case V_028C70_COLOR_8:
171			case V_028C70_COLOR_8_8:
172			case V_028C70_COLOR_8_8_8_8:
173				/* For 1 and 2-channel formats, use the superset thereof. */
174				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
175				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
176				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
177					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
178					sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
179				}
180				break;
181
182			case V_028C70_COLOR_5_6_5:
183				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
184					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
185					sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
186				}
187				break;
188
189			case V_028C70_COLOR_1_5_5_5:
190				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
191					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
192					sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
193				}
194				break;
195
196			case V_028C70_COLOR_4_4_4_4:
197				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
198					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
199					sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
200				}
201				break;
202
203			case V_028C70_COLOR_32:
204				if (swap == V_0280A0_SWAP_STD &&
205				    spi_format == V_028714_SPI_SHADER_32_R)
206					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
207				else if (swap == V_0280A0_SWAP_ALT_REV &&
208					 spi_format == V_028714_SPI_SHADER_32_AR)
209					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
210				break;
211
212			case V_028C70_COLOR_16:
213			case V_028C70_COLOR_16_16:
214				/* For 1-channel formats, use the superset thereof. */
215				if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
216				    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
217				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
218				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
219					if (swap == V_0280A0_SWAP_STD ||
220					    swap == V_0280A0_SWAP_STD_REV)
221						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
222					else
223						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
224				}
225				break;
226
227			case V_028C70_COLOR_10_11_11:
228				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
229					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
230					sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
231				}
232				break;
233
234			case V_028C70_COLOR_2_10_10_10:
235				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
236					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
237					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
238				}
239				break;
240			}
241		}
242
243		if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
244			sx_ps_downconvert = 0;
245			sx_blend_opt_epsilon = 0;
246			sx_blend_opt_control = 0;
247		}
248
249		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
250		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
251		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
252		radeon_emit(cs, sx_blend_opt_control);	/* R_02875C_SX_BLEND_OPT_CONTROL */
253	}
254}
255
256/*
257 * Blender functions
258 */
259
260static uint32_t si_translate_blend_function(int blend_func)
261{
262	switch (blend_func) {
263	case PIPE_BLEND_ADD:
264		return V_028780_COMB_DST_PLUS_SRC;
265	case PIPE_BLEND_SUBTRACT:
266		return V_028780_COMB_SRC_MINUS_DST;
267	case PIPE_BLEND_REVERSE_SUBTRACT:
268		return V_028780_COMB_DST_MINUS_SRC;
269	case PIPE_BLEND_MIN:
270		return V_028780_COMB_MIN_DST_SRC;
271	case PIPE_BLEND_MAX:
272		return V_028780_COMB_MAX_DST_SRC;
273	default:
274		R600_ERR("Unknown blend function %d\n", blend_func);
275		assert(0);
276		break;
277	}
278	return 0;
279}
280
281static uint32_t si_translate_blend_factor(int blend_fact)
282{
283	switch (blend_fact) {
284	case PIPE_BLENDFACTOR_ONE:
285		return V_028780_BLEND_ONE;
286	case PIPE_BLENDFACTOR_SRC_COLOR:
287		return V_028780_BLEND_SRC_COLOR;
288	case PIPE_BLENDFACTOR_SRC_ALPHA:
289		return V_028780_BLEND_SRC_ALPHA;
290	case PIPE_BLENDFACTOR_DST_ALPHA:
291		return V_028780_BLEND_DST_ALPHA;
292	case PIPE_BLENDFACTOR_DST_COLOR:
293		return V_028780_BLEND_DST_COLOR;
294	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
295		return V_028780_BLEND_SRC_ALPHA_SATURATE;
296	case PIPE_BLENDFACTOR_CONST_COLOR:
297		return V_028780_BLEND_CONSTANT_COLOR;
298	case PIPE_BLENDFACTOR_CONST_ALPHA:
299		return V_028780_BLEND_CONSTANT_ALPHA;
300	case PIPE_BLENDFACTOR_ZERO:
301		return V_028780_BLEND_ZERO;
302	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
303		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
304	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
305		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
306	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
307		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
308	case PIPE_BLENDFACTOR_INV_DST_COLOR:
309		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
310	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
311		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
312	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
313		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
314	case PIPE_BLENDFACTOR_SRC1_COLOR:
315		return V_028780_BLEND_SRC1_COLOR;
316	case PIPE_BLENDFACTOR_SRC1_ALPHA:
317		return V_028780_BLEND_SRC1_ALPHA;
318	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
319		return V_028780_BLEND_INV_SRC1_COLOR;
320	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
321		return V_028780_BLEND_INV_SRC1_ALPHA;
322	default:
323		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
324		assert(0);
325		break;
326	}
327	return 0;
328}
329
330static uint32_t si_translate_blend_opt_function(int blend_func)
331{
332	switch (blend_func) {
333	case PIPE_BLEND_ADD:
334		return V_028760_OPT_COMB_ADD;
335	case PIPE_BLEND_SUBTRACT:
336		return V_028760_OPT_COMB_SUBTRACT;
337	case PIPE_BLEND_REVERSE_SUBTRACT:
338		return V_028760_OPT_COMB_REVSUBTRACT;
339	case PIPE_BLEND_MIN:
340		return V_028760_OPT_COMB_MIN;
341	case PIPE_BLEND_MAX:
342		return V_028760_OPT_COMB_MAX;
343	default:
344		return V_028760_OPT_COMB_BLEND_DISABLED;
345	}
346}
347
348static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
349{
350	switch (blend_fact) {
351	case PIPE_BLENDFACTOR_ZERO:
352		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
353	case PIPE_BLENDFACTOR_ONE:
354		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
355	case PIPE_BLENDFACTOR_SRC_COLOR:
356		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
357				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
358	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
359		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
360				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
361	case PIPE_BLENDFACTOR_SRC_ALPHA:
362		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
363	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
364		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
365	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
366		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
367				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
368	default:
369		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
370	}
371}
372
373/**
374 * Get rid of DST in the blend factors by commuting the operands:
375 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
376 */
377static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
378				unsigned *dst_factor, unsigned expected_dst,
379				unsigned replacement_src)
380{
381	if (*src_factor == expected_dst &&
382	    *dst_factor == PIPE_BLENDFACTOR_ZERO) {
383		*src_factor = PIPE_BLENDFACTOR_ZERO;
384		*dst_factor = replacement_src;
385
386		/* Commuting the operands requires reversing subtractions. */
387		if (*func == PIPE_BLEND_SUBTRACT)
388			*func = PIPE_BLEND_REVERSE_SUBTRACT;
389		else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
390			*func = PIPE_BLEND_SUBTRACT;
391	}
392}
393
394static bool si_blend_factor_uses_dst(unsigned factor)
395{
396	return factor == PIPE_BLENDFACTOR_DST_COLOR ||
397		factor == PIPE_BLENDFACTOR_DST_ALPHA ||
398		factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
399		factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
400		factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
401}
402
403static void *si_create_blend_state_mode(struct pipe_context *ctx,
404					const struct pipe_blend_state *state,
405					unsigned mode)
406{
407	struct si_context *sctx = (struct si_context*)ctx;
408	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
409	struct si_pm4_state *pm4 = &blend->pm4;
410	uint32_t sx_mrt_blend_opt[8] = {0};
411	uint32_t color_control = 0;
412
413	if (!blend)
414		return NULL;
415
416	blend->alpha_to_coverage = state->alpha_to_coverage;
417	blend->alpha_to_one = state->alpha_to_one;
418	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
419
420	if (state->logicop_enable) {
421		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
422	} else {
423		color_control |= S_028808_ROP3(0xcc);
424	}
425
426	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
427		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
428		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
429		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
430		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
431		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
432
433	if (state->alpha_to_coverage)
434		blend->need_src_alpha_4bit |= 0xf;
435
436	blend->cb_target_mask = 0;
437	for (int i = 0; i < 8; i++) {
438		/* state->rt entries > 0 only written if independent blending */
439		const int j = state->independent_blend_enable ? i : 0;
440
441		unsigned eqRGB = state->rt[j].rgb_func;
442		unsigned srcRGB = state->rt[j].rgb_src_factor;
443		unsigned dstRGB = state->rt[j].rgb_dst_factor;
444		unsigned eqA = state->rt[j].alpha_func;
445		unsigned srcA = state->rt[j].alpha_src_factor;
446		unsigned dstA = state->rt[j].alpha_dst_factor;
447
448		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
449		unsigned blend_cntl = 0;
450
451		sx_mrt_blend_opt[i] =
452			S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
453			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
454
455		/* Only set dual source blending for MRT0 to avoid a hang. */
456		if (i >= 1 && blend->dual_src_blend) {
457			/* Vulkan does this for dual source blending. */
458			if (i == 1)
459				blend_cntl |= S_028780_ENABLE(1);
460
461			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
462			continue;
463		}
464
465		/* Only addition and subtraction equations are supported with
466		 * dual source blending.
467		 */
468		if (blend->dual_src_blend &&
469		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
470		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
471			assert(!"Unsupported equation for dual source blending");
472			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
473			continue;
474		}
475
476		/* cb_render_state will disable unused ones */
477		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
478
479		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
480			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
481			continue;
482		}
483
484		/* Blending optimizations for Stoney.
485		 * These transformations don't change the behavior.
486		 *
487		 * First, get rid of DST in the blend factors:
488		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
489		 */
490		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
491				    PIPE_BLENDFACTOR_DST_COLOR,
492				    PIPE_BLENDFACTOR_SRC_COLOR);
493		si_blend_remove_dst(&eqA, &srcA, &dstA,
494				    PIPE_BLENDFACTOR_DST_COLOR,
495				    PIPE_BLENDFACTOR_SRC_COLOR);
496		si_blend_remove_dst(&eqA, &srcA, &dstA,
497				    PIPE_BLENDFACTOR_DST_ALPHA,
498				    PIPE_BLENDFACTOR_SRC_ALPHA);
499
500		/* Look up the ideal settings from tables. */
501		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
502		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
503		srcA_opt = si_translate_blend_opt_factor(srcA, true);
504		dstA_opt = si_translate_blend_opt_factor(dstA, true);
505
506		/* Handle interdependencies. */
507		if (si_blend_factor_uses_dst(srcRGB))
508			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
509		if (si_blend_factor_uses_dst(srcA))
510			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
511
512		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
513		    (dstRGB == PIPE_BLENDFACTOR_ZERO ||
514		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
515		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
516			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
517
518		/* Set the final value. */
519		sx_mrt_blend_opt[i] =
520			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
521			S_028760_COLOR_DST_OPT(dstRGB_opt) |
522			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
523			S_028760_ALPHA_SRC_OPT(srcA_opt) |
524			S_028760_ALPHA_DST_OPT(dstA_opt) |
525			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
526
527		/* Set blend state. */
528		blend_cntl |= S_028780_ENABLE(1);
529		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
530		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
531		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
532
533		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
534			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
535			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
536			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
537			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
538		}
539		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
540
541		blend->blend_enable_4bit |= 0xfu << (i * 4);
542
543		/* This is only important for formats without alpha. */
544		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
545		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
546		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
547		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
548		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
549		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
550			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
551	}
552
553	if (blend->cb_target_mask) {
554		color_control |= S_028808_MODE(mode);
555	} else {
556		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
557	}
558
559	if (sctx->b.family == CHIP_STONEY) {
560		/* Disable RB+ blend optimizations for dual source blending.
561		 * Vulkan does this.
562		 */
563		if (blend->dual_src_blend) {
564			for (int i = 0; i < 8; i++) {
565				sx_mrt_blend_opt[i] =
566					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
567					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
568			}
569		}
570
571		for (int i = 0; i < 8; i++)
572			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
573				       sx_mrt_blend_opt[i]);
574
575		/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
576		if (blend->dual_src_blend || state->logicop_enable ||
577		    mode == V_028808_CB_RESOLVE)
578			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
579	}
580
581	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
582	return blend;
583}
584
585static void *si_create_blend_state(struct pipe_context *ctx,
586				   const struct pipe_blend_state *state)
587{
588	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
589}
590
591static void si_bind_blend_state(struct pipe_context *ctx, void *state)
592{
593	struct si_context *sctx = (struct si_context *)ctx;
594	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
595	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
596	sctx->do_update_shaders = true;
597}
598
599static void si_delete_blend_state(struct pipe_context *ctx, void *state)
600{
601	struct si_context *sctx = (struct si_context *)ctx;
602	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
603}
604
605static void si_set_blend_color(struct pipe_context *ctx,
606			       const struct pipe_blend_color *state)
607{
608	struct si_context *sctx = (struct si_context *)ctx;
609
610	if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
611		return;
612
613	sctx->blend_color.state = *state;
614	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
615}
616
617static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
618{
619	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
620
621	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
622	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
623}
624
625/*
626 * Clipping
627 */
628
629static void si_set_clip_state(struct pipe_context *ctx,
630			      const struct pipe_clip_state *state)
631{
632	struct si_context *sctx = (struct si_context *)ctx;
633	struct pipe_constant_buffer cb;
634
635	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
636		return;
637
638	sctx->clip_state.state = *state;
639	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
640
641	cb.buffer = NULL;
642	cb.user_buffer = state->ucp;
643	cb.buffer_offset = 0;
644	cb.buffer_size = 4*4*8;
645	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
646	pipe_resource_reference(&cb.buffer, NULL);
647}
648
649static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
650{
651	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
652
653	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
654	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
655}
656
657#define SIX_BITS 0x3F
658
659static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
660{
661	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
662	struct si_shader *vs = si_get_vs_state(sctx);
663	struct tgsi_shader_info *info = si_get_vs_info(sctx);
664	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
665	unsigned window_space =
666	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
667	unsigned clipdist_mask =
668		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
669	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
670	unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance;
671	unsigned total_mask;
672	bool misc_vec_ena;
673
674	if (vs->key.opt.hw_vs.clip_disable) {
675		assert(!info->culldist_writemask);
676		clipdist_mask = 0;
677		culldist_mask = 0;
678	}
679	total_mask = clipdist_mask | culldist_mask;
680
681	/* Clip distances on points have no effect, so need to be implemented
682	 * as cull distances. This applies for the clipvertex case as well.
683	 *
684	 * Setting this for primitives other than points should have no adverse
685	 * effects.
686	 */
687	clipdist_mask &= rs->clip_plane_enable;
688	culldist_mask |= clipdist_mask;
689
690	misc_vec_ena = info->writes_psize || info->writes_edgeflag ||
691		       info->writes_layer || info->writes_viewport_index;
692
693	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
694		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
695		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
696		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
697	        S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
698		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
699		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
700		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
701		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
702		clipdist_mask | (culldist_mask << 8));
703	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
704		rs->pa_cl_clip_cntl |
705		ucp_mask |
706		S_028810_CLIP_DISABLE(window_space));
707
708	/* reuse needs to be set off if we write oViewport */
709	radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
710			       S_028AB4_REUSE_OFF(info->writes_viewport_index));
711}
712
713/*
714 * inferred state between framebuffer and rasterizer
715 */
716static void si_update_poly_offset_state(struct si_context *sctx)
717{
718	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
719
720	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
721		return;
722
723	/* Use the user format, not db_render_format, so that the polygon
724	 * offset behaves as expected by applications.
725	 */
726	switch (sctx->framebuffer.state.zsbuf->texture->format) {
727	case PIPE_FORMAT_Z16_UNORM:
728		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
729		break;
730	default: /* 24-bit */
731		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
732		break;
733	case PIPE_FORMAT_Z32_FLOAT:
734	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
735		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
736		break;
737	}
738}
739
740/*
741 * Rasterizer
742 */
743
744static uint32_t si_translate_fill(uint32_t func)
745{
746	switch(func) {
747	case PIPE_POLYGON_MODE_FILL:
748		return V_028814_X_DRAW_TRIANGLES;
749	case PIPE_POLYGON_MODE_LINE:
750		return V_028814_X_DRAW_LINES;
751	case PIPE_POLYGON_MODE_POINT:
752		return V_028814_X_DRAW_POINTS;
753	default:
754		assert(0);
755		return V_028814_X_DRAW_POINTS;
756	}
757}
758
759static void *si_create_rs_state(struct pipe_context *ctx,
760				const struct pipe_rasterizer_state *state)
761{
762	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
763	struct si_pm4_state *pm4 = &rs->pm4;
764	unsigned tmp, i;
765	float psize_min, psize_max;
766
767	if (!rs) {
768		return NULL;
769	}
770
771	rs->scissor_enable = state->scissor;
772	rs->clip_halfz = state->clip_halfz;
773	rs->two_side = state->light_twoside;
774	rs->multisample_enable = state->multisample;
775	rs->force_persample_interp = state->force_persample_interp;
776	rs->clip_plane_enable = state->clip_plane_enable;
777	rs->line_stipple_enable = state->line_stipple_enable;
778	rs->poly_stipple_enable = state->poly_stipple_enable;
779	rs->line_smooth = state->line_smooth;
780	rs->poly_smooth = state->poly_smooth;
781	rs->uses_poly_offset = state->offset_point || state->offset_line ||
782			       state->offset_tri;
783	rs->clamp_fragment_color = state->clamp_fragment_color;
784	rs->flatshade = state->flatshade;
785	rs->sprite_coord_enable = state->sprite_coord_enable;
786	rs->rasterizer_discard = state->rasterizer_discard;
787	rs->pa_sc_line_stipple = state->line_stipple_enable ?
788				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
789				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
790	rs->pa_cl_clip_cntl =
791		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
792		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
793		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
794		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
795		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
796
797	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
798		S_0286D4_FLAT_SHADE_ENA(1) |
799		S_0286D4_PNT_SPRITE_ENA(1) |
800		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
801		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
802		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
803		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
804		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
805
806	/* point size 12.4 fixed point */
807	tmp = (unsigned)(state->point_size * 8.0);
808	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
809
810	if (state->point_size_per_vertex) {
811		psize_min = util_get_min_point_size(state);
812		psize_max = 8192;
813	} else {
814		/* Force the point size to be as if the vertex output was disabled. */
815		psize_min = state->point_size;
816		psize_max = state->point_size;
817	}
818	/* Divide by two, because 0.5 = 1 pixel. */
819	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
820			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
821			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
822
823	tmp = (unsigned)state->line_width * 8;
824	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
825	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
826		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
827		       S_028A48_MSAA_ENABLE(state->multisample ||
828					    state->poly_smooth ||
829					    state->line_smooth) |
830		       S_028A48_VPORT_SCISSOR_ENABLE(1));
831
832	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
833		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
834		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
835
836	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
837	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
838		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
839		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
840		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
841		S_028814_FACE(!state->front_ccw) |
842		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
843		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
844		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
845		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
846				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
847		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
848		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
849	si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
850		       SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
851
852	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
853	for (i = 0; i < 3; i++) {
854		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
855		float offset_units = state->offset_units;
856		float offset_scale = state->offset_scale * 16.0f;
857		uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
858
859		if (!state->offset_units_unscaled) {
860			switch (i) {
861			case 0: /* 16-bit zbuffer */
862				offset_units *= 4.0f;
863				pa_su_poly_offset_db_fmt_cntl =
864					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
865				break;
866			case 1: /* 24-bit zbuffer */
867				offset_units *= 2.0f;
868				pa_su_poly_offset_db_fmt_cntl =
869					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
870				break;
871			case 2: /* 32-bit zbuffer */
872				offset_units *= 1.0f;
873				pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
874								S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
875				break;
876			}
877		}
878
879		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
880			       fui(offset_scale));
881		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
882			       fui(offset_units));
883		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
884			       fui(offset_scale));
885		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
886			       fui(offset_units));
887		si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
888			       pa_su_poly_offset_db_fmt_cntl);
889	}
890
891	return rs;
892}
893
894static void si_bind_rs_state(struct pipe_context *ctx, void *state)
895{
896	struct si_context *sctx = (struct si_context *)ctx;
897	struct si_state_rasterizer *old_rs =
898		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
899	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
900
901	if (!state)
902		return;
903
904	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
905		si_mark_atom_dirty(sctx, &sctx->db_render_state);
906
907		/* Update the small primitive filter workaround if necessary. */
908		if (sctx->b.family >= CHIP_POLARIS10 &&
909		    sctx->framebuffer.nr_samples > 1)
910			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
911	}
912
913	r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
914
915	si_pm4_bind_state(sctx, rasterizer, rs);
916	si_update_poly_offset_state(sctx);
917
918	si_mark_atom_dirty(sctx, &sctx->clip_regs);
919	sctx->do_update_shaders = true;
920}
921
922static void si_delete_rs_state(struct pipe_context *ctx, void *state)
923{
924	struct si_context *sctx = (struct si_context *)ctx;
925
926	if (sctx->queued.named.rasterizer == state)
927		si_pm4_bind_state(sctx, poly_offset, NULL);
928	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
929}
930
931/*
932 * infeered state between dsa and stencil ref
933 */
934static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
935{
936	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
937	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
938	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
939
940	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
941	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
942			S_028430_STENCILMASK(dsa->valuemask[0]) |
943			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
944			S_028430_STENCILOPVAL(1));
945	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
946			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
947			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
948			S_028434_STENCILOPVAL_BF(1));
949}
950
951static void si_set_stencil_ref(struct pipe_context *ctx,
952			       const struct pipe_stencil_ref *state)
953{
954        struct si_context *sctx = (struct si_context *)ctx;
955
956	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
957		return;
958
959	sctx->stencil_ref.state = *state;
960	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
961}
962
963
964/*
965 * DSA
966 */
967
968static uint32_t si_translate_stencil_op(int s_op)
969{
970	switch (s_op) {
971	case PIPE_STENCIL_OP_KEEP:
972		return V_02842C_STENCIL_KEEP;
973	case PIPE_STENCIL_OP_ZERO:
974		return V_02842C_STENCIL_ZERO;
975	case PIPE_STENCIL_OP_REPLACE:
976		return V_02842C_STENCIL_REPLACE_TEST;
977	case PIPE_STENCIL_OP_INCR:
978		return V_02842C_STENCIL_ADD_CLAMP;
979	case PIPE_STENCIL_OP_DECR:
980		return V_02842C_STENCIL_SUB_CLAMP;
981	case PIPE_STENCIL_OP_INCR_WRAP:
982		return V_02842C_STENCIL_ADD_WRAP;
983	case PIPE_STENCIL_OP_DECR_WRAP:
984		return V_02842C_STENCIL_SUB_WRAP;
985	case PIPE_STENCIL_OP_INVERT:
986		return V_02842C_STENCIL_INVERT;
987	default:
988		R600_ERR("Unknown stencil op %d", s_op);
989		assert(0);
990		break;
991	}
992	return 0;
993}
994
995static void *si_create_dsa_state(struct pipe_context *ctx,
996				 const struct pipe_depth_stencil_alpha_state *state)
997{
998	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
999	struct si_pm4_state *pm4 = &dsa->pm4;
1000	unsigned db_depth_control;
1001	uint32_t db_stencil_control = 0;
1002
1003	if (!dsa) {
1004		return NULL;
1005	}
1006
1007	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
1008	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
1009	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
1010	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
1011
1012	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
1013		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
1014		S_028800_ZFUNC(state->depth.func) |
1015		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
1016
1017	/* stencil */
1018	if (state->stencil[0].enabled) {
1019		db_depth_control |= S_028800_STENCIL_ENABLE(1);
1020		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
1021		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
1022		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
1023		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
1024
1025		if (state->stencil[1].enabled) {
1026			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
1027			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
1028			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
1029			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
1030			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
1031		}
1032	}
1033
1034	/* alpha */
1035	if (state->alpha.enabled) {
1036		dsa->alpha_func = state->alpha.func;
1037
1038		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
1039		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
1040	} else {
1041		dsa->alpha_func = PIPE_FUNC_ALWAYS;
1042	}
1043
1044	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
1045	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
1046	if (state->depth.bounds_test) {
1047		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
1048		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
1049	}
1050
1051	return dsa;
1052}
1053
1054static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1055{
1056        struct si_context *sctx = (struct si_context *)ctx;
1057        struct si_state_dsa *dsa = state;
1058
1059        if (!state)
1060                return;
1061
1062	si_pm4_bind_state(sctx, dsa, dsa);
1063
1064	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1065		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1066		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1067		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
1068	}
1069	sctx->do_update_shaders = true;
1070}
1071
1072static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1073{
1074	struct si_context *sctx = (struct si_context *)ctx;
1075	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
1076}
1077
1078static void *si_create_db_flush_dsa(struct si_context *sctx)
1079{
1080	struct pipe_depth_stencil_alpha_state dsa = {};
1081
1082	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
1083}
1084
1085/* DB RENDER STATE */
1086
1087static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
1088{
1089	struct si_context *sctx = (struct si_context*)ctx;
1090
1091	/* Pipeline stat & streamout queries. */
1092	if (enable) {
1093		sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
1094		sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
1095	} else {
1096		sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
1097		sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
1098	}
1099
1100	/* Occlusion queries. */
1101	if (sctx->occlusion_queries_disabled != !enable) {
1102		sctx->occlusion_queries_disabled = !enable;
1103		si_mark_atom_dirty(sctx, &sctx->db_render_state);
1104	}
1105}
1106
1107static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
1108{
1109	struct si_context *sctx = (struct si_context*)ctx;
1110
1111	si_mark_atom_dirty(sctx, &sctx->db_render_state);
1112}
1113
1114static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
1115{
1116	struct si_context *sctx = (struct si_context*)ctx;
1117
1118	st->saved_compute = sctx->cs_shader_state.program;
1119
1120	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1121	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1122}
1123
1124static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
1125{
1126	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1127	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1128	unsigned db_shader_control;
1129
1130	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
1131
1132	/* DB_RENDER_CONTROL */
1133	if (sctx->dbcb_depth_copy_enabled ||
1134	    sctx->dbcb_stencil_copy_enabled) {
1135		radeon_emit(cs,
1136			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1137			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1138			    S_028000_COPY_CENTROID(1) |
1139			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
1140	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1141		radeon_emit(cs,
1142			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1143			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
1144	} else {
1145		radeon_emit(cs,
1146			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1147			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
1148	}
1149
1150	/* DB_COUNT_CONTROL (occlusion queries) */
1151	if (sctx->b.num_occlusion_queries > 0 &&
1152	    !sctx->occlusion_queries_disabled) {
1153		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
1154
1155		if (sctx->b.chip_class >= CIK) {
1156			radeon_emit(cs,
1157				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1158				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
1159				    S_028004_ZPASS_ENABLE(1) |
1160				    S_028004_SLICE_EVEN_ENABLE(1) |
1161				    S_028004_SLICE_ODD_ENABLE(1));
1162		} else {
1163			radeon_emit(cs,
1164				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1165				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
1166		}
1167	} else {
1168		/* Disable occlusion queries. */
1169		if (sctx->b.chip_class >= CIK) {
1170			radeon_emit(cs, 0);
1171		} else {
1172			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
1173		}
1174	}
1175
1176	/* DB_RENDER_OVERRIDE2 */
1177	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
1178		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1179		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1180		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
1181
1182	db_shader_control = sctx->ps_db_shader_control;
1183
1184	/* Bug workaround for smoothing (overrasterization) on SI. */
1185	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
1186		db_shader_control &= C_02880C_Z_ORDER;
1187		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1188	}
1189
1190	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1191	if (!rs || !rs->multisample_enable)
1192		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1193
1194	if (sctx->b.family == CHIP_STONEY &&
1195	    sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
1196		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
1197
1198	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
1199			       db_shader_control);
1200}
1201
1202/*
1203 * format translation
1204 */
1205static uint32_t si_translate_colorformat(enum pipe_format format)
1206{
1207	const struct util_format_description *desc = util_format_description(format);
1208
1209#define HAS_SIZE(x,y,z,w) \
1210	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1211         desc->channel[2].size == (z) && desc->channel[3].size == (w))
1212
1213	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1214		return V_028C70_COLOR_10_11_11;
1215
1216	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1217		return V_028C70_COLOR_INVALID;
1218
1219	/* hw cannot support mixed formats (except depth/stencil, since
1220	 * stencil is not written to). */
1221	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1222		return V_028C70_COLOR_INVALID;
1223
1224	switch (desc->nr_channels) {
1225	case 1:
1226		switch (desc->channel[0].size) {
1227		case 8:
1228			return V_028C70_COLOR_8;
1229		case 16:
1230			return V_028C70_COLOR_16;
1231		case 32:
1232			return V_028C70_COLOR_32;
1233		}
1234		break;
1235	case 2:
1236		if (desc->channel[0].size == desc->channel[1].size) {
1237			switch (desc->channel[0].size) {
1238			case 8:
1239				return V_028C70_COLOR_8_8;
1240			case 16:
1241				return V_028C70_COLOR_16_16;
1242			case 32:
1243				return V_028C70_COLOR_32_32;
1244			}
1245		} else if (HAS_SIZE(8,24,0,0)) {
1246			return V_028C70_COLOR_24_8;
1247		} else if (HAS_SIZE(24,8,0,0)) {
1248			return V_028C70_COLOR_8_24;
1249		}
1250		break;
1251	case 3:
1252		if (HAS_SIZE(5,6,5,0)) {
1253			return V_028C70_COLOR_5_6_5;
1254		} else if (HAS_SIZE(32,8,24,0)) {
1255			return V_028C70_COLOR_X24_8_32_FLOAT;
1256		}
1257		break;
1258	case 4:
1259		if (desc->channel[0].size == desc->channel[1].size &&
1260		    desc->channel[0].size == desc->channel[2].size &&
1261		    desc->channel[0].size == desc->channel[3].size) {
1262			switch (desc->channel[0].size) {
1263			case 4:
1264				return V_028C70_COLOR_4_4_4_4;
1265			case 8:
1266				return V_028C70_COLOR_8_8_8_8;
1267			case 16:
1268				return V_028C70_COLOR_16_16_16_16;
1269			case 32:
1270				return V_028C70_COLOR_32_32_32_32;
1271			}
1272		} else if (HAS_SIZE(5,5,5,1)) {
1273			return V_028C70_COLOR_1_5_5_5;
1274		} else if (HAS_SIZE(10,10,10,2)) {
1275			return V_028C70_COLOR_2_10_10_10;
1276		}
1277		break;
1278	}
1279	return V_028C70_COLOR_INVALID;
1280}
1281
1282static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1283{
1284	if (SI_BIG_ENDIAN) {
1285		switch(colorformat) {
1286		/* 8-bit buffers. */
1287		case V_028C70_COLOR_8:
1288			return V_028C70_ENDIAN_NONE;
1289
1290		/* 16-bit buffers. */
1291		case V_028C70_COLOR_5_6_5:
1292		case V_028C70_COLOR_1_5_5_5:
1293		case V_028C70_COLOR_4_4_4_4:
1294		case V_028C70_COLOR_16:
1295		case V_028C70_COLOR_8_8:
1296			return V_028C70_ENDIAN_8IN16;
1297
1298		/* 32-bit buffers. */
1299		case V_028C70_COLOR_8_8_8_8:
1300		case V_028C70_COLOR_2_10_10_10:
1301		case V_028C70_COLOR_8_24:
1302		case V_028C70_COLOR_24_8:
1303		case V_028C70_COLOR_16_16:
1304			return V_028C70_ENDIAN_8IN32;
1305
1306		/* 64-bit buffers. */
1307		case V_028C70_COLOR_16_16_16_16:
1308			return V_028C70_ENDIAN_8IN16;
1309
1310		case V_028C70_COLOR_32_32:
1311			return V_028C70_ENDIAN_8IN32;
1312
1313		/* 128-bit buffers. */
1314		case V_028C70_COLOR_32_32_32_32:
1315			return V_028C70_ENDIAN_8IN32;
1316		default:
1317			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1318		}
1319	} else {
1320		return V_028C70_ENDIAN_NONE;
1321	}
1322}
1323
1324static uint32_t si_translate_dbformat(enum pipe_format format)
1325{
1326	switch (format) {
1327	case PIPE_FORMAT_Z16_UNORM:
1328		return V_028040_Z_16;
1329	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1330	case PIPE_FORMAT_X8Z24_UNORM:
1331	case PIPE_FORMAT_Z24X8_UNORM:
1332	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1333		return V_028040_Z_24; /* deprecated on SI */
1334	case PIPE_FORMAT_Z32_FLOAT:
1335	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1336		return V_028040_Z_32_FLOAT;
1337	default:
1338		return V_028040_Z_INVALID;
1339	}
1340}
1341
1342/*
1343 * Texture translation
1344 */
1345
1346static uint32_t si_translate_texformat(struct pipe_screen *screen,
1347				       enum pipe_format format,
1348				       const struct util_format_description *desc,
1349				       int first_non_void)
1350{
1351	struct si_screen *sscreen = (struct si_screen*)screen;
1352	bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
1353					  sscreen->b.info.drm_minor >= 31) ||
1354					 sscreen->b.info.drm_major == 3;
1355	bool uniform = true;
1356	int i;
1357
1358	/* Colorspace (return non-RGB formats directly). */
1359	switch (desc->colorspace) {
1360	/* Depth stencil formats */
1361	case UTIL_FORMAT_COLORSPACE_ZS:
1362		switch (format) {
1363		case PIPE_FORMAT_Z16_UNORM:
1364			return V_008F14_IMG_DATA_FORMAT_16;
1365		case PIPE_FORMAT_X24S8_UINT:
1366		case PIPE_FORMAT_Z24X8_UNORM:
1367		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1368			return V_008F14_IMG_DATA_FORMAT_8_24;
1369		case PIPE_FORMAT_X8Z24_UNORM:
1370		case PIPE_FORMAT_S8X24_UINT:
1371		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1372			return V_008F14_IMG_DATA_FORMAT_24_8;
1373		case PIPE_FORMAT_S8_UINT:
1374			return V_008F14_IMG_DATA_FORMAT_8;
1375		case PIPE_FORMAT_Z32_FLOAT:
1376			return V_008F14_IMG_DATA_FORMAT_32;
1377		case PIPE_FORMAT_X32_S8X24_UINT:
1378		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1379			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1380		default:
1381			goto out_unknown;
1382		}
1383
1384	case UTIL_FORMAT_COLORSPACE_YUV:
1385		goto out_unknown; /* TODO */
1386
1387	case UTIL_FORMAT_COLORSPACE_SRGB:
1388		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1389			goto out_unknown;
1390		break;
1391
1392	default:
1393		break;
1394	}
1395
1396	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1397		if (!enable_compressed_formats)
1398			goto out_unknown;
1399
1400		switch (format) {
1401		case PIPE_FORMAT_RGTC1_SNORM:
1402		case PIPE_FORMAT_LATC1_SNORM:
1403		case PIPE_FORMAT_RGTC1_UNORM:
1404		case PIPE_FORMAT_LATC1_UNORM:
1405			return V_008F14_IMG_DATA_FORMAT_BC4;
1406		case PIPE_FORMAT_RGTC2_SNORM:
1407		case PIPE_FORMAT_LATC2_SNORM:
1408		case PIPE_FORMAT_RGTC2_UNORM:
1409		case PIPE_FORMAT_LATC2_UNORM:
1410			return V_008F14_IMG_DATA_FORMAT_BC5;
1411		default:
1412			goto out_unknown;
1413		}
1414	}
1415
1416	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1417	    sscreen->b.family == CHIP_STONEY) {
1418		switch (format) {
1419		case PIPE_FORMAT_ETC1_RGB8:
1420		case PIPE_FORMAT_ETC2_RGB8:
1421		case PIPE_FORMAT_ETC2_SRGB8:
1422			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1423		case PIPE_FORMAT_ETC2_RGB8A1:
1424		case PIPE_FORMAT_ETC2_SRGB8A1:
1425			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1426		case PIPE_FORMAT_ETC2_RGBA8:
1427		case PIPE_FORMAT_ETC2_SRGBA8:
1428			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1429		case PIPE_FORMAT_ETC2_R11_UNORM:
1430		case PIPE_FORMAT_ETC2_R11_SNORM:
1431			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1432		case PIPE_FORMAT_ETC2_RG11_UNORM:
1433		case PIPE_FORMAT_ETC2_RG11_SNORM:
1434			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1435		default:
1436			goto out_unknown;
1437		}
1438	}
1439
1440	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1441		if (!enable_compressed_formats)
1442			goto out_unknown;
1443
1444		switch (format) {
1445		case PIPE_FORMAT_BPTC_RGBA_UNORM:
1446		case PIPE_FORMAT_BPTC_SRGBA:
1447			return V_008F14_IMG_DATA_FORMAT_BC7;
1448		case PIPE_FORMAT_BPTC_RGB_FLOAT:
1449		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1450			return V_008F14_IMG_DATA_FORMAT_BC6;
1451		default:
1452			goto out_unknown;
1453		}
1454	}
1455
1456	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1457		switch (format) {
1458		case PIPE_FORMAT_R8G8_B8G8_UNORM:
1459		case PIPE_FORMAT_G8R8_B8R8_UNORM:
1460			return V_008F14_IMG_DATA_FORMAT_GB_GR;
1461		case PIPE_FORMAT_G8R8_G8B8_UNORM:
1462		case PIPE_FORMAT_R8G8_R8B8_UNORM:
1463			return V_008F14_IMG_DATA_FORMAT_BG_RG;
1464		default:
1465			goto out_unknown;
1466		}
1467	}
1468
1469	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1470		if (!enable_compressed_formats)
1471			goto out_unknown;
1472
1473		if (!util_format_s3tc_enabled) {
1474			goto out_unknown;
1475		}
1476
1477		switch (format) {
1478		case PIPE_FORMAT_DXT1_RGB:
1479		case PIPE_FORMAT_DXT1_RGBA:
1480		case PIPE_FORMAT_DXT1_SRGB:
1481		case PIPE_FORMAT_DXT1_SRGBA:
1482			return V_008F14_IMG_DATA_FORMAT_BC1;
1483		case PIPE_FORMAT_DXT3_RGBA:
1484		case PIPE_FORMAT_DXT3_SRGBA:
1485			return V_008F14_IMG_DATA_FORMAT_BC2;
1486		case PIPE_FORMAT_DXT5_RGBA:
1487		case PIPE_FORMAT_DXT5_SRGBA:
1488			return V_008F14_IMG_DATA_FORMAT_BC3;
1489		default:
1490			goto out_unknown;
1491		}
1492	}
1493
1494	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1495		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1496	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1497		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1498	}
1499
1500	/* R8G8Bx_SNORM - TODO CxV8U8 */
1501
1502	/* hw cannot support mixed formats (except depth/stencil, since only
1503	 * depth is read).*/
1504	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1505		goto out_unknown;
1506
1507	/* See whether the components are of the same size. */
1508	for (i = 1; i < desc->nr_channels; i++) {
1509		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1510	}
1511
1512	/* Non-uniform formats. */
1513	if (!uniform) {
1514		switch(desc->nr_channels) {
1515		case 3:
1516			if (desc->channel[0].size == 5 &&
1517			    desc->channel[1].size == 6 &&
1518			    desc->channel[2].size == 5) {
1519				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1520			}
1521			goto out_unknown;
1522		case 4:
1523			if (desc->channel[0].size == 5 &&
1524			    desc->channel[1].size == 5 &&
1525			    desc->channel[2].size == 5 &&
1526			    desc->channel[3].size == 1) {
1527				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1528			}
1529			if (desc->channel[0].size == 10 &&
1530			    desc->channel[1].size == 10 &&
1531			    desc->channel[2].size == 10 &&
1532			    desc->channel[3].size == 2) {
1533				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1534			}
1535			goto out_unknown;
1536		}
1537		goto out_unknown;
1538	}
1539
1540	if (first_non_void < 0 || first_non_void > 3)
1541		goto out_unknown;
1542
1543	/* uniform formats */
1544	switch (desc->channel[first_non_void].size) {
1545	case 4:
1546		switch (desc->nr_channels) {
1547#if 0 /* Not supported for render targets */
1548		case 2:
1549			return V_008F14_IMG_DATA_FORMAT_4_4;
1550#endif
1551		case 4:
1552			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1553		}
1554		break;
1555	case 8:
1556		switch (desc->nr_channels) {
1557		case 1:
1558			return V_008F14_IMG_DATA_FORMAT_8;
1559		case 2:
1560			return V_008F14_IMG_DATA_FORMAT_8_8;
1561		case 4:
1562			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1563		}
1564		break;
1565	case 16:
1566		switch (desc->nr_channels) {
1567		case 1:
1568			return V_008F14_IMG_DATA_FORMAT_16;
1569		case 2:
1570			return V_008F14_IMG_DATA_FORMAT_16_16;
1571		case 4:
1572			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1573		}
1574		break;
1575	case 32:
1576		switch (desc->nr_channels) {
1577		case 1:
1578			return V_008F14_IMG_DATA_FORMAT_32;
1579		case 2:
1580			return V_008F14_IMG_DATA_FORMAT_32_32;
1581#if 0 /* Not supported for render targets */
1582		case 3:
1583			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1584#endif
1585		case 4:
1586			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1587		}
1588	}
1589
1590out_unknown:
1591	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1592	return ~0;
1593}
1594
1595static unsigned si_tex_wrap(unsigned wrap)
1596{
1597	switch (wrap) {
1598	default:
1599	case PIPE_TEX_WRAP_REPEAT:
1600		return V_008F30_SQ_TEX_WRAP;
1601	case PIPE_TEX_WRAP_CLAMP:
1602		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1603	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1604		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1605	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1606		return V_008F30_SQ_TEX_CLAMP_BORDER;
1607	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1608		return V_008F30_SQ_TEX_MIRROR;
1609	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1610		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1611	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1612		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1613	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1614		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1615	}
1616}
1617
1618static unsigned si_tex_mipfilter(unsigned filter)
1619{
1620	switch (filter) {
1621	case PIPE_TEX_MIPFILTER_NEAREST:
1622		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1623	case PIPE_TEX_MIPFILTER_LINEAR:
1624		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1625	default:
1626	case PIPE_TEX_MIPFILTER_NONE:
1627		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1628	}
1629}
1630
1631static unsigned si_tex_compare(unsigned compare)
1632{
1633	switch (compare) {
1634	default:
1635	case PIPE_FUNC_NEVER:
1636		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1637	case PIPE_FUNC_LESS:
1638		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1639	case PIPE_FUNC_EQUAL:
1640		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1641	case PIPE_FUNC_LEQUAL:
1642		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1643	case PIPE_FUNC_GREATER:
1644		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1645	case PIPE_FUNC_NOTEQUAL:
1646		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1647	case PIPE_FUNC_GEQUAL:
1648		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1649	case PIPE_FUNC_ALWAYS:
1650		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1651	}
1652}
1653
1654static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
1655			   unsigned nr_samples)
1656{
1657	if (view_target == PIPE_TEXTURE_CUBE ||
1658	    view_target == PIPE_TEXTURE_CUBE_ARRAY)
1659		res_target = view_target;
1660	/* If interpreting cubemaps as something else, set 2D_ARRAY. */
1661	else if (res_target == PIPE_TEXTURE_CUBE ||
1662		 res_target == PIPE_TEXTURE_CUBE_ARRAY)
1663		res_target = PIPE_TEXTURE_2D_ARRAY;
1664
1665	switch (res_target) {
1666	default:
1667	case PIPE_TEXTURE_1D:
1668		return V_008F1C_SQ_RSRC_IMG_1D;
1669	case PIPE_TEXTURE_1D_ARRAY:
1670		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1671	case PIPE_TEXTURE_2D:
1672	case PIPE_TEXTURE_RECT:
1673		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1674					V_008F1C_SQ_RSRC_IMG_2D;
1675	case PIPE_TEXTURE_2D_ARRAY:
1676		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1677					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1678	case PIPE_TEXTURE_3D:
1679		return V_008F1C_SQ_RSRC_IMG_3D;
1680	case PIPE_TEXTURE_CUBE:
1681	case PIPE_TEXTURE_CUBE_ARRAY:
1682		return V_008F1C_SQ_RSRC_IMG_CUBE;
1683	}
1684}
1685
1686/*
1687 * Format support testing
1688 */
1689
1690static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1691{
1692	return si_translate_texformat(screen, format, util_format_description(format),
1693				      util_format_get_first_non_void_channel(format)) != ~0U;
1694}
1695
1696static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1697					       const struct util_format_description *desc,
1698					       int first_non_void)
1699{
1700	unsigned type;
1701	int i;
1702
1703	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1704		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1705
1706	assert(first_non_void >= 0);
1707	type = desc->channel[first_non_void].type;
1708
1709	if (type == UTIL_FORMAT_TYPE_FIXED)
1710		return V_008F0C_BUF_DATA_FORMAT_INVALID;
1711
1712	if (desc->nr_channels == 4 &&
1713	    desc->channel[0].size == 10 &&
1714	    desc->channel[1].size == 10 &&
1715	    desc->channel[2].size == 10 &&
1716	    desc->channel[3].size == 2)
1717		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1718
1719	/* See whether the components are of the same size. */
1720	for (i = 0; i < desc->nr_channels; i++) {
1721		if (desc->channel[first_non_void].size != desc->channel[i].size)
1722			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1723	}
1724
1725	switch (desc->channel[first_non_void].size) {
1726	case 8:
1727		switch (desc->nr_channels) {
1728		case 1:
1729			return V_008F0C_BUF_DATA_FORMAT_8;
1730		case 2:
1731			return V_008F0C_BUF_DATA_FORMAT_8_8;
1732		case 3:
1733		case 4:
1734			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1735		}
1736		break;
1737	case 16:
1738		switch (desc->nr_channels) {
1739		case 1:
1740			return V_008F0C_BUF_DATA_FORMAT_16;
1741		case 2:
1742			return V_008F0C_BUF_DATA_FORMAT_16_16;
1743		case 3:
1744		case 4:
1745			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1746		}
1747		break;
1748	case 32:
1749		/* From the Southern Islands ISA documentation about MTBUF:
1750		 * 'Memory reads of data in memory that is 32 or 64 bits do not
1751		 * undergo any format conversion.'
1752		 */
1753		if (type != UTIL_FORMAT_TYPE_FLOAT &&
1754		    !desc->channel[first_non_void].pure_integer)
1755			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1756
1757		switch (desc->nr_channels) {
1758		case 1:
1759			return V_008F0C_BUF_DATA_FORMAT_32;
1760		case 2:
1761			return V_008F0C_BUF_DATA_FORMAT_32_32;
1762		case 3:
1763			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1764		case 4:
1765			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1766		}
1767		break;
1768	}
1769
1770	return V_008F0C_BUF_DATA_FORMAT_INVALID;
1771}
1772
1773static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1774					      const struct util_format_description *desc,
1775					      int first_non_void)
1776{
1777	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1778		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1779
1780	assert(first_non_void >= 0);
1781
1782	switch (desc->channel[first_non_void].type) {
1783	case UTIL_FORMAT_TYPE_SIGNED:
1784		if (desc->channel[first_non_void].normalized)
1785			return V_008F0C_BUF_NUM_FORMAT_SNORM;
1786		else if (desc->channel[first_non_void].pure_integer)
1787			return V_008F0C_BUF_NUM_FORMAT_SINT;
1788		else
1789			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1790		break;
1791	case UTIL_FORMAT_TYPE_UNSIGNED:
1792		if (desc->channel[first_non_void].normalized)
1793			return V_008F0C_BUF_NUM_FORMAT_UNORM;
1794		else if (desc->channel[first_non_void].pure_integer)
1795			return V_008F0C_BUF_NUM_FORMAT_UINT;
1796		else
1797			return V_008F0C_BUF_NUM_FORMAT_USCALED;
1798		break;
1799	case UTIL_FORMAT_TYPE_FLOAT:
1800	default:
1801		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1802	}
1803}
1804
1805static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
1806					      enum pipe_format format,
1807					      unsigned usage)
1808{
1809	const struct util_format_description *desc;
1810	int first_non_void;
1811	unsigned data_format;
1812
1813	assert((usage & ~(PIPE_BIND_SHADER_IMAGE |
1814			  PIPE_BIND_SAMPLER_VIEW |
1815			  PIPE_BIND_VERTEX_BUFFER)) == 0);
1816
1817	desc = util_format_description(format);
1818
1819	/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
1820	 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
1821	 * for read-only access (with caveats surrounding bounds checks), but
1822	 * obviously fails for write access which we have to implement for
1823	 * shader images. Luckily, OpenGL doesn't expect this to be supported
1824	 * anyway, and so the only impact is on PBO uploads / downloads, which
1825	 * shouldn't be expected to be fast for GL_RGB anyway.
1826	 */
1827	if (desc->block.bits == 3 * 8 ||
1828	    desc->block.bits == 3 * 16) {
1829		if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
1830		    usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
1831			if (!usage)
1832				return 0;
1833		}
1834	}
1835
1836	first_non_void = util_format_get_first_non_void_channel(format);
1837	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1838	if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
1839		return 0;
1840
1841	return usage;
1842}
1843
1844static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1845{
1846	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1847		r600_translate_colorswap(format, false) != ~0U;
1848}
1849
1850static bool si_is_zs_format_supported(enum pipe_format format)
1851{
1852	return si_translate_dbformat(format) != V_028040_Z_INVALID;
1853}
1854
1855static boolean si_is_format_supported(struct pipe_screen *screen,
1856				      enum pipe_format format,
1857				      enum pipe_texture_target target,
1858				      unsigned sample_count,
1859				      unsigned usage)
1860{
1861	unsigned retval = 0;
1862
1863	if (target >= PIPE_MAX_TEXTURE_TYPES) {
1864		R600_ERR("r600: unsupported texture type %d\n", target);
1865		return false;
1866	}
1867
1868	if (!util_format_is_supported(format, usage))
1869		return false;
1870
1871	if (sample_count > 1) {
1872		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
1873			return false;
1874
1875		if (usage & PIPE_BIND_SHADER_IMAGE)
1876			return false;
1877
1878		switch (sample_count) {
1879		case 2:
1880		case 4:
1881		case 8:
1882			break;
1883		case 16:
1884			if (format == PIPE_FORMAT_NONE)
1885				return true;
1886			else
1887				return false;
1888		default:
1889			return false;
1890		}
1891	}
1892
1893	if (usage & (PIPE_BIND_SAMPLER_VIEW |
1894		     PIPE_BIND_SHADER_IMAGE)) {
1895		if (target == PIPE_BUFFER) {
1896			retval |= si_is_vertex_format_supported(
1897				screen, format, usage & (PIPE_BIND_SAMPLER_VIEW |
1898						         PIPE_BIND_SHADER_IMAGE));
1899		} else {
1900			if (si_is_sampler_format_supported(screen, format))
1901				retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
1902						   PIPE_BIND_SHADER_IMAGE);
1903		}
1904	}
1905
1906	if ((usage & (PIPE_BIND_RENDER_TARGET |
1907		      PIPE_BIND_DISPLAY_TARGET |
1908		      PIPE_BIND_SCANOUT |
1909		      PIPE_BIND_SHARED |
1910		      PIPE_BIND_BLENDABLE)) &&
1911	    si_is_colorbuffer_format_supported(format)) {
1912		retval |= usage &
1913			  (PIPE_BIND_RENDER_TARGET |
1914			   PIPE_BIND_DISPLAY_TARGET |
1915			   PIPE_BIND_SCANOUT |
1916			   PIPE_BIND_SHARED);
1917		if (!util_format_is_pure_integer(format) &&
1918		    !util_format_is_depth_or_stencil(format))
1919			retval |= usage & PIPE_BIND_BLENDABLE;
1920	}
1921
1922	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1923	    si_is_zs_format_supported(format)) {
1924		retval |= PIPE_BIND_DEPTH_STENCIL;
1925	}
1926
1927	if (usage & PIPE_BIND_VERTEX_BUFFER) {
1928		retval |= si_is_vertex_format_supported(screen, format,
1929							PIPE_BIND_VERTEX_BUFFER);
1930	}
1931
1932	if ((usage & PIPE_BIND_LINEAR) &&
1933	    !util_format_is_compressed(format) &&
1934	    !(usage & PIPE_BIND_DEPTH_STENCIL))
1935		retval |= PIPE_BIND_LINEAR;
1936
1937	return retval == usage;
1938}
1939
1940/*
1941 * framebuffer handling
1942 */
1943
1944static void si_choose_spi_color_formats(struct r600_surface *surf,
1945					unsigned format, unsigned swap,
1946					unsigned ntype, bool is_depth)
1947{
1948	/* Alpha is needed for alpha-to-coverage.
1949	 * Blending may be with or without alpha.
1950	 */
1951	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
1952	unsigned alpha = 0; /* exports alpha, but may not support blending */
1953	unsigned blend = 0; /* supports blending, but may not export alpha */
1954	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
1955
1956	/* Choose the SPI color formats. These are required values for Stoney/RB+.
1957	 * Other chips have multiple choices, though they are not necessarily better.
1958	 */
1959	switch (format) {
1960	case V_028C70_COLOR_5_6_5:
1961	case V_028C70_COLOR_1_5_5_5:
1962	case V_028C70_COLOR_5_5_5_1:
1963	case V_028C70_COLOR_4_4_4_4:
1964	case V_028C70_COLOR_10_11_11:
1965	case V_028C70_COLOR_11_11_10:
1966	case V_028C70_COLOR_8:
1967	case V_028C70_COLOR_8_8:
1968	case V_028C70_COLOR_8_8_8_8:
1969	case V_028C70_COLOR_10_10_10_2:
1970	case V_028C70_COLOR_2_10_10_10:
1971		if (ntype == V_028C70_NUMBER_UINT)
1972			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1973		else if (ntype == V_028C70_NUMBER_SINT)
1974			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1975		else
1976			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1977		break;
1978
1979	case V_028C70_COLOR_16:
1980	case V_028C70_COLOR_16_16:
1981	case V_028C70_COLOR_16_16_16_16:
1982		if (ntype == V_028C70_NUMBER_UNORM ||
1983		    ntype == V_028C70_NUMBER_SNORM) {
1984			/* UNORM16 and SNORM16 don't support blending */
1985			if (ntype == V_028C70_NUMBER_UNORM)
1986				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
1987			else
1988				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
1989
1990			/* Use 32 bits per channel for blending. */
1991			if (format == V_028C70_COLOR_16) {
1992				if (swap == V_028C70_SWAP_STD) { /* R */
1993					blend = V_028714_SPI_SHADER_32_R;
1994					blend_alpha = V_028714_SPI_SHADER_32_AR;
1995				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1996					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1997				else
1998					assert(0);
1999			} else if (format == V_028C70_COLOR_16_16) {
2000				if (swap == V_028C70_SWAP_STD) { /* RG */
2001					blend = V_028714_SPI_SHADER_32_GR;
2002					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2003				} else if (swap == V_028C70_SWAP_ALT) /* RA */
2004					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
2005				else
2006					assert(0);
2007			} else /* 16_16_16_16 */
2008				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2009		} else if (ntype == V_028C70_NUMBER_UINT)
2010			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
2011		else if (ntype == V_028C70_NUMBER_SINT)
2012			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
2013		else if (ntype == V_028C70_NUMBER_FLOAT)
2014			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
2015		else
2016			assert(0);
2017		break;
2018
2019	case V_028C70_COLOR_32:
2020		if (swap == V_028C70_SWAP_STD) { /* R */
2021			blend = normal = V_028714_SPI_SHADER_32_R;
2022			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
2023		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
2024			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2025		else
2026			assert(0);
2027		break;
2028
2029	case V_028C70_COLOR_32_32:
2030		if (swap == V_028C70_SWAP_STD) { /* RG */
2031			blend = normal = V_028714_SPI_SHADER_32_GR;
2032			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2033		} else if (swap == V_028C70_SWAP_ALT) /* RA */
2034			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2035		else
2036			assert(0);
2037		break;
2038
2039	case V_028C70_COLOR_32_32_32_32:
2040	case V_028C70_COLOR_8_24:
2041	case V_028C70_COLOR_24_8:
2042	case V_028C70_COLOR_X24_8_32_FLOAT:
2043		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2044		break;
2045
2046	default:
2047		assert(0);
2048		return;
2049	}
2050
2051	/* The DB->CB copy needs 32_ABGR. */
2052	if (is_depth)
2053		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2054
2055	surf->spi_shader_col_format = normal;
2056	surf->spi_shader_col_format_alpha = alpha;
2057	surf->spi_shader_col_format_blend = blend;
2058	surf->spi_shader_col_format_blend_alpha = blend_alpha;
2059}
2060
2061static void si_initialize_color_surface(struct si_context *sctx,
2062					struct r600_surface *surf)
2063{
2064	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2065	unsigned color_info, color_attrib, color_view;
2066	unsigned format, swap, ntype, endian;
2067	const struct util_format_description *desc;
2068	int i;
2069	unsigned blend_clamp = 0, blend_bypass = 0;
2070
2071	color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2072		     S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
2073
2074	desc = util_format_description(surf->base.format);
2075	for (i = 0; i < 4; i++) {
2076		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2077			break;
2078		}
2079	}
2080	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
2081		ntype = V_028C70_NUMBER_FLOAT;
2082	} else {
2083		ntype = V_028C70_NUMBER_UNORM;
2084		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
2085			ntype = V_028C70_NUMBER_SRGB;
2086		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2087			if (desc->channel[i].pure_integer) {
2088				ntype = V_028C70_NUMBER_SINT;
2089			} else {
2090				assert(desc->channel[i].normalized);
2091				ntype = V_028C70_NUMBER_SNORM;
2092			}
2093		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2094			if (desc->channel[i].pure_integer) {
2095				ntype = V_028C70_NUMBER_UINT;
2096			} else {
2097				assert(desc->channel[i].normalized);
2098				ntype = V_028C70_NUMBER_UNORM;
2099			}
2100		}
2101	}
2102
2103	format = si_translate_colorformat(surf->base.format);
2104	if (format == V_028C70_COLOR_INVALID) {
2105		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2106	}
2107	assert(format != V_028C70_COLOR_INVALID);
2108	swap = r600_translate_colorswap(surf->base.format, false);
2109	endian = si_colorformat_endian_swap(format);
2110
2111	/* blend clamp should be set for all NORM/SRGB types */
2112	if (ntype == V_028C70_NUMBER_UNORM ||
2113	    ntype == V_028C70_NUMBER_SNORM ||
2114	    ntype == V_028C70_NUMBER_SRGB)
2115		blend_clamp = 1;
2116
2117	/* set blend bypass according to docs if SINT/UINT or
2118	   8/24 COLOR variants */
2119	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2120	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2121	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
2122		blend_clamp = 0;
2123		blend_bypass = 1;
2124	}
2125
2126	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2127	    (format == V_028C70_COLOR_8 ||
2128	     format == V_028C70_COLOR_8_8 ||
2129	     format == V_028C70_COLOR_8_8_8_8))
2130		surf->color_is_int8 = true;
2131
2132	color_info = S_028C70_FORMAT(format) |
2133		S_028C70_COMP_SWAP(swap) |
2134		S_028C70_BLEND_CLAMP(blend_clamp) |
2135		S_028C70_BLEND_BYPASS(blend_bypass) |
2136		S_028C70_SIMPLE_FLOAT(1) |
2137		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2138				    ntype != V_028C70_NUMBER_SNORM &&
2139				    ntype != V_028C70_NUMBER_SRGB &&
2140				    format != V_028C70_COLOR_8_24 &&
2141				    format != V_028C70_COLOR_24_8) |
2142		S_028C70_NUMBER_TYPE(ntype) |
2143		S_028C70_ENDIAN(endian);
2144
2145	/* Intensity is implemented as Red, so treat it that way. */
2146	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2147						  util_format_is_intensity(surf->base.format));
2148
2149	if (rtex->resource.b.b.nr_samples > 1) {
2150		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
2151
2152		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2153				S_028C74_NUM_FRAGMENTS(log_samples);
2154
2155		if (rtex->fmask.size) {
2156			color_info |= S_028C70_COMPRESSION(1);
2157			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
2158
2159			if (sctx->b.chip_class == SI) {
2160				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
2161				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
2162			}
2163		}
2164	}
2165
2166	surf->cb_color_view = color_view;
2167	surf->cb_color_info = color_info;
2168	surf->cb_color_attrib = color_attrib;
2169
2170	if (sctx->b.chip_class >= VI) {
2171		unsigned max_uncompressed_block_size = 2;
2172
2173		if (rtex->resource.b.b.nr_samples > 1) {
2174			if (rtex->surface.bpe == 1)
2175				max_uncompressed_block_size = 0;
2176			else if (rtex->surface.bpe == 2)
2177				max_uncompressed_block_size = 1;
2178		}
2179
2180		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2181		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
2182	}
2183
2184	/* This must be set for fast clear to work without FMASK. */
2185	if (!rtex->fmask.size && sctx->b.chip_class == SI) {
2186		unsigned bankh = util_logbase2(rtex->surface.bankh);
2187		surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2188	}
2189
2190	/* Determine pixel shader export format */
2191	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
2192
2193	surf->color_initialized = true;
2194}
2195
2196static void si_init_depth_surface(struct si_context *sctx,
2197				  struct r600_surface *surf)
2198{
2199	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2200	unsigned level = surf->base.u.tex.level;
2201	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
2202	unsigned format;
2203	uint32_t z_info, s_info, db_depth_info;
2204	uint64_t z_offs, s_offs;
2205	uint32_t db_htile_data_base, db_htile_surface;
2206
2207	format = si_translate_dbformat(rtex->db_render_format);
2208
2209	if (format == V_028040_Z_INVALID) {
2210		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
2211	}
2212	assert(format != V_028040_Z_INVALID);
2213
2214	s_offs = z_offs = rtex->resource.gpu_address;
2215	z_offs += rtex->surface.level[level].offset;
2216	s_offs += rtex->surface.stencil_level[level].offset;
2217
2218	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
2219
2220	z_info = S_028040_FORMAT(format);
2221	if (rtex->resource.b.b.nr_samples > 1) {
2222		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
2223	}
2224
2225	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
2226		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
2227	else
2228		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2229
2230	if (sctx->b.chip_class >= CIK) {
2231		struct radeon_info *info = &sctx->screen->b.info;
2232		unsigned index = rtex->surface.tiling_index[level];
2233		unsigned stencil_index = rtex->surface.stencil_tiling_index[level];
2234		unsigned macro_index = rtex->surface.macro_tile_index;
2235		unsigned tile_mode = info->si_tile_mode_array[index];
2236		unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2237		unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2238
2239		db_depth_info |=
2240			S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2241			S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2242			S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2243			S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2244			S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2245			S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2246		z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2247		s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2248	} else {
2249		unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
2250		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2251		tile_mode_index = si_tile_mode_index(rtex, level, true);
2252		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2253	}
2254
2255	/* HiZ aka depth buffer htile */
2256	/* use htile only for first level */
2257	if (rtex->htile_buffer && !level) {
2258		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2259			  S_028040_ALLOW_EXPCLEAR(1);
2260
2261		if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
2262			/* Workaround: For a not yet understood reason, the
2263			 * combination of MSAA, fast stencil clear and stencil
2264			 * decompress messes with subsequent stencil buffer
2265			 * uses. Problem was reproduced on Verde, Bonaire,
2266			 * Tonga, and Carrizo.
2267			 *
2268			 * Disabling EXPCLEAR works around the problem.
2269			 *
2270			 * Check piglit's arb_texture_multisample-stencil-clear
2271			 * test if you want to try changing this.
2272			 */
2273			if (rtex->resource.b.b.nr_samples <= 1)
2274				s_info |= S_028044_ALLOW_EXPCLEAR(1);
2275		} else if (!rtex->tc_compatible_htile) {
2276			/* Use all of the htile_buffer for depth if there's no stencil.
2277			 * This must not be set when TC-compatible HTILE is enabled
2278			 * due to a hw bug.
2279			 */
2280			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2281		}
2282
2283		uint64_t va = rtex->htile_buffer->gpu_address;
2284		db_htile_data_base = va >> 8;
2285		db_htile_surface = S_028ABC_FULL_CACHE(1);
2286
2287		if (rtex->tc_compatible_htile) {
2288			db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
2289
2290			switch (rtex->resource.b.b.nr_samples) {
2291			case 0:
2292			case 1:
2293				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
2294				break;
2295			case 2:
2296			case 4:
2297				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
2298				break;
2299			case 8:
2300				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
2301				break;
2302			default:
2303				assert(0);
2304			}
2305		}
2306	} else {
2307		db_htile_data_base = 0;
2308		db_htile_surface = 0;
2309	}
2310
2311	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2312
2313	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2314			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2315	surf->db_htile_data_base = db_htile_data_base;
2316	surf->db_depth_info = db_depth_info;
2317	surf->db_z_info = z_info;
2318	surf->db_stencil_info = s_info;
2319	surf->db_depth_base = z_offs >> 8;
2320	surf->db_stencil_base = s_offs >> 8;
2321	surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2322			      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2323	surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2324							levelinfo->nblk_y) / 64 - 1);
2325	surf->db_htile_surface = db_htile_surface;
2326
2327	surf->depth_initialized = true;
2328}
2329
2330static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
2331{
2332	for (int i = 0; i < state->nr_cbufs; ++i) {
2333		struct r600_surface *surf = NULL;
2334		struct r600_texture *rtex;
2335
2336		if (!state->cbufs[i])
2337			continue;
2338		surf = (struct r600_surface*)state->cbufs[i];
2339		rtex = (struct r600_texture*)surf->base.texture;
2340
2341		p_atomic_dec(&rtex->framebuffers_bound);
2342	}
2343}
2344
2345static void si_set_framebuffer_state(struct pipe_context *ctx,
2346				     const struct pipe_framebuffer_state *state)
2347{
2348	struct si_context *sctx = (struct si_context *)ctx;
2349	struct pipe_constant_buffer constbuf = {0};
2350	struct r600_surface *surf = NULL;
2351	struct r600_texture *rtex;
2352	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
2353	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2354	int i;
2355
2356	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
2357		if (!sctx->framebuffer.state.cbufs[i])
2358			continue;
2359
2360		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
2361		if (rtex->dcc_gather_statistics)
2362			vi_separate_dcc_stop_query(ctx, rtex);
2363	}
2364
2365	/* Only flush TC when changing the framebuffer state, because
2366	 * the only client not using TC that can change textures is
2367	 * the framebuffer.
2368	 *
2369	 * Flush all CB and DB caches here because all buffers can be used
2370	 * for write by both TC (with shader image stores) and CB/DB.
2371	 */
2372	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
2373			 SI_CONTEXT_INV_GLOBAL_L2 |
2374			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
2375			 SI_CONTEXT_CS_PARTIAL_FLUSH;
2376
2377	/* Take the maximum of the old and new count. If the new count is lower,
2378	 * dirtying is needed to disable the unbound colorbuffers.
2379	 */
2380	sctx->framebuffer.dirty_cbufs |=
2381		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
2382	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
2383
2384	si_dec_framebuffer_counters(&sctx->framebuffer.state);
2385	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2386
2387	sctx->framebuffer.colorbuf_enabled_4bit = 0;
2388	sctx->framebuffer.spi_shader_col_format = 0;
2389	sctx->framebuffer.spi_shader_col_format_alpha = 0;
2390	sctx->framebuffer.spi_shader_col_format_blend = 0;
2391	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
2392	sctx->framebuffer.color_is_int8 = 0;
2393
2394	sctx->framebuffer.compressed_cb_mask = 0;
2395	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2396	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2397	sctx->framebuffer.any_dst_linear = false;
2398
2399	for (i = 0; i < state->nr_cbufs; i++) {
2400		if (!state->cbufs[i])
2401			continue;
2402
2403		surf = (struct r600_surface*)state->cbufs[i];
2404		rtex = (struct r600_texture*)surf->base.texture;
2405
2406		if (!surf->color_initialized) {
2407			si_initialize_color_surface(sctx, surf);
2408		}
2409
2410		sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
2411		sctx->framebuffer.spi_shader_col_format |=
2412			surf->spi_shader_col_format << (i * 4);
2413		sctx->framebuffer.spi_shader_col_format_alpha |=
2414			surf->spi_shader_col_format_alpha << (i * 4);
2415		sctx->framebuffer.spi_shader_col_format_blend |=
2416			surf->spi_shader_col_format_blend << (i * 4);
2417		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2418			surf->spi_shader_col_format_blend_alpha << (i * 4);
2419
2420		if (surf->color_is_int8)
2421			sctx->framebuffer.color_is_int8 |= 1 << i;
2422
2423		if (rtex->fmask.size) {
2424			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2425		}
2426
2427		if (rtex->surface.is_linear)
2428			sctx->framebuffer.any_dst_linear = true;
2429
2430		r600_context_add_resource_size(ctx, surf->base.texture);
2431
2432		p_atomic_inc(&rtex->framebuffers_bound);
2433
2434		if (rtex->dcc_gather_statistics) {
2435			/* Dirty tracking must be enabled for DCC usage analysis. */
2436			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2437			vi_separate_dcc_start_query(ctx, rtex);
2438		}
2439	}
2440
2441	if (state->zsbuf) {
2442		surf = (struct r600_surface*)state->zsbuf;
2443		rtex = (struct r600_texture*)surf->base.texture;
2444
2445		if (!surf->depth_initialized) {
2446			si_init_depth_surface(sctx, surf);
2447		}
2448		r600_context_add_resource_size(ctx, surf->base.texture);
2449	}
2450
2451	si_update_poly_offset_state(sctx);
2452	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
2453	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
2454
2455	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
2456		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2457
2458	if (sctx->framebuffer.nr_samples != old_nr_samples) {
2459		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2460		si_mark_atom_dirty(sctx, &sctx->db_render_state);
2461
2462		/* Set sample locations as fragment shader constants. */
2463		switch (sctx->framebuffer.nr_samples) {
2464		case 1:
2465			constbuf.user_buffer = sctx->b.sample_locations_1x;
2466			break;
2467		case 2:
2468			constbuf.user_buffer = sctx->b.sample_locations_2x;
2469			break;
2470		case 4:
2471			constbuf.user_buffer = sctx->b.sample_locations_4x;
2472			break;
2473		case 8:
2474			constbuf.user_buffer = sctx->b.sample_locations_8x;
2475			break;
2476		case 16:
2477			constbuf.user_buffer = sctx->b.sample_locations_16x;
2478			break;
2479		default:
2480			R600_ERR("Requested an invalid number of samples %i.\n",
2481				 sctx->framebuffer.nr_samples);
2482			assert(0);
2483		}
2484		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2485		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
2486
2487		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
2488	}
2489
2490	sctx->need_check_render_feedback = true;
2491	sctx->do_update_shaders = true;
2492}
2493
2494static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2495{
2496	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2497	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2498	unsigned i, nr_cbufs = state->nr_cbufs;
2499	struct r600_texture *tex = NULL;
2500	struct r600_surface *cb = NULL;
2501	unsigned cb_color_info = 0;
2502
2503	/* Colorbuffers. */
2504	for (i = 0; i < nr_cbufs; i++) {
2505		const struct radeon_surf_level *level_info;
2506		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2507		unsigned cb_color_base, cb_color_fmask, cb_color_attrib;
2508		unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
2509
2510		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
2511			continue;
2512
2513		cb = (struct r600_surface*)state->cbufs[i];
2514		if (!cb) {
2515			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2516					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2517			continue;
2518		}
2519
2520		tex = (struct r600_texture *)cb->base.texture;
2521		level_info =  &tex->surface.level[cb->base.u.tex.level];
2522		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2523				      &tex->resource, RADEON_USAGE_READWRITE,
2524				      tex->resource.b.b.nr_samples > 1 ?
2525					      RADEON_PRIO_COLOR_BUFFER_MSAA :
2526					      RADEON_PRIO_COLOR_BUFFER);
2527
2528		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2529			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2530				tex->cmask_buffer, RADEON_USAGE_READWRITE,
2531				RADEON_PRIO_CMASK);
2532		}
2533
2534		if (tex->dcc_separate_buffer)
2535			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2536						  tex->dcc_separate_buffer,
2537						  RADEON_USAGE_READWRITE,
2538						  RADEON_PRIO_DCC);
2539
2540		/* Compute mutable surface parameters. */
2541		pitch_tile_max = level_info->nblk_x / 8 - 1;
2542		slice_tile_max = level_info->nblk_x *
2543				 level_info->nblk_y / 64 - 1;
2544		tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
2545
2546		cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8;
2547		cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2548		cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2549		cb_color_attrib = cb->cb_color_attrib |
2550				  S_028C74_TILE_MODE_INDEX(tile_mode_index);
2551
2552		if (tex->fmask.size) {
2553			if (sctx->b.chip_class >= CIK)
2554				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
2555			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
2556			cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
2557			cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
2558		} else {
2559			/* This must be set for fast clear to work without FMASK. */
2560			if (sctx->b.chip_class >= CIK)
2561				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2562			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2563			cb_color_fmask = cb_color_base;
2564			cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2565		}
2566
2567		cb_color_info = cb->cb_color_info | tex->cb_color_info;
2568
2569		if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) {
2570			bool is_msaa_resolve_dst = state->cbufs[0] &&
2571						   state->cbufs[0]->texture->nr_samples > 1 &&
2572						   state->cbufs[1] == &cb->base &&
2573						   state->cbufs[1]->texture->nr_samples <= 1;
2574
2575			if (!is_msaa_resolve_dst)
2576				cb_color_info |= S_028C70_DCC_ENABLE(1);
2577		}
2578
2579		radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
2580					   sctx->b.chip_class >= VI ? 14 : 13);
2581		radeon_emit(cs, cb_color_base);		/* R_028C60_CB_COLOR0_BASE */
2582		radeon_emit(cs, cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
2583		radeon_emit(cs, cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
2584		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
2585		radeon_emit(cs, cb_color_info);		/* R_028C70_CB_COLOR0_INFO */
2586		radeon_emit(cs, cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
2587		radeon_emit(cs, cb->cb_dcc_control);	/* R_028C78_CB_COLOR0_DCC_CONTROL */
2588		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
2589		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
2590		radeon_emit(cs, cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
2591		radeon_emit(cs, cb_color_fmask_slice);		/* R_028C88_CB_COLOR0_FMASK_SLICE */
2592		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2593		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2594
2595		if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
2596			radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
2597					 tex->dcc_offset +
2598				         tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
2599	}
2600	for (; i < 8 ; i++)
2601		if (sctx->framebuffer.dirty_cbufs & (1 << i))
2602			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2603
2604	/* ZS buffer. */
2605	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
2606		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2607		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2608
2609		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2610				      &rtex->resource, RADEON_USAGE_READWRITE,
2611				      zb->base.texture->nr_samples > 1 ?
2612					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
2613					      RADEON_PRIO_DEPTH_BUFFER);
2614
2615		if (zb->db_htile_data_base) {
2616			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2617					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
2618					      RADEON_PRIO_HTILE);
2619		}
2620
2621		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2622		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2623
2624		radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2625		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
2626		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
2627			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2628		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
2629		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
2630		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
2631		radeon_emit(cs, zb->db_depth_base);	/* R_028050_DB_Z_WRITE_BASE */
2632		radeon_emit(cs, zb->db_stencil_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
2633		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
2634		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
2635
2636		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
2637		radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
2638		radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
2639
2640		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2641	} else if (sctx->framebuffer.dirty_zsbuf) {
2642		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2643		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2644		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2645	}
2646
2647	/* Framebuffer dimensions. */
2648        /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2649	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2650			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2651
2652	sctx->framebuffer.dirty_cbufs = 0;
2653	sctx->framebuffer.dirty_zsbuf = false;
2654}
2655
2656static void si_emit_msaa_sample_locs(struct si_context *sctx,
2657				     struct r600_atom *atom)
2658{
2659	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2660	unsigned nr_samples = sctx->framebuffer.nr_samples;
2661
2662	/* Smoothing (only possible with nr_samples == 1) uses the same
2663	 * sample locations as the MSAA it simulates.
2664	 */
2665	if (nr_samples <= 1 && sctx->smoothing_enabled)
2666		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
2667
2668	/* On Polaris, the small primitive filter uses the sample locations
2669	 * even when MSAA is off, so we need to make sure they're set to 0.
2670	 */
2671	if (sctx->b.family >= CHIP_POLARIS10)
2672		nr_samples = MAX2(nr_samples, 1);
2673
2674	if (nr_samples >= 1 &&
2675	    (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
2676		sctx->msaa_sample_locs.nr_samples = nr_samples;
2677		cayman_emit_msaa_sample_locs(cs, nr_samples);
2678	}
2679
2680	if (sctx->b.family >= CHIP_POLARIS10) {
2681		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
2682		unsigned small_prim_filter_cntl =
2683			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
2684			S_028830_LINE_FILTER_DISABLE(1); /* line bug */
2685
2686		/* The alternative of setting sample locations to 0 would
2687		 * require a DB flush to avoid Z errors, see
2688		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
2689		 */
2690		if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
2691			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
2692
2693		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
2694				       small_prim_filter_cntl);
2695	}
2696}
2697
2698static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
2699{
2700	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2701	unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
2702	/* 33% faster rendering to linear color buffers */
2703	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
2704	unsigned sc_mode_cntl_1 =
2705		S_028A4C_WALK_SIZE(dst_is_linear) |
2706		S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
2707		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
2708		/* always 1: */
2709		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
2710		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
2711		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
2712		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
2713		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
2714		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
2715
2716	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2717				sctx->ps_iter_samples,
2718				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
2719				sc_mode_cntl_1);
2720}
2721
2722static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2723{
2724	struct si_context *sctx = (struct si_context *)ctx;
2725
2726	if (sctx->ps_iter_samples == min_samples)
2727		return;
2728
2729	sctx->ps_iter_samples = min_samples;
2730	sctx->do_update_shaders = true;
2731
2732	if (sctx->framebuffer.nr_samples > 1)
2733		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2734}
2735
2736/*
2737 * Samplers
2738 */
2739
2740/**
2741 * Build the sampler view descriptor for a buffer texture.
2742 * @param state 256-bit descriptor; only the high 128 bits are filled in
2743 */
2744void
2745si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
2746			  enum pipe_format format,
2747			  unsigned offset, unsigned size,
2748			  uint32_t *state)
2749{
2750	const struct util_format_description *desc;
2751	int first_non_void;
2752	unsigned stride;
2753	unsigned num_records;
2754	unsigned num_format, data_format;
2755
2756	desc = util_format_description(format);
2757	first_non_void = util_format_get_first_non_void_channel(format);
2758	stride = desc->block.bits / 8;
2759	num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
2760	data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
2761
2762	num_records = size / stride;
2763	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
2764
2765	if (screen->b.chip_class >= VI)
2766		num_records *= stride;
2767
2768	state[4] = 0;
2769	state[5] = S_008F04_STRIDE(stride);
2770	state[6] = num_records;
2771	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2772		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2773		   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2774		   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2775		   S_008F0C_NUM_FORMAT(num_format) |
2776		   S_008F0C_DATA_FORMAT(data_format);
2777}
2778
2779/**
2780 * Build the sampler view descriptor for a texture.
2781 */
2782void
2783si_make_texture_descriptor(struct si_screen *screen,
2784			   struct r600_texture *tex,
2785			   bool sampler,
2786			   enum pipe_texture_target target,
2787			   enum pipe_format pipe_format,
2788			   const unsigned char state_swizzle[4],
2789			   unsigned first_level, unsigned last_level,
2790			   unsigned first_layer, unsigned last_layer,
2791			   unsigned width, unsigned height, unsigned depth,
2792			   uint32_t *state,
2793			   uint32_t *fmask_state)
2794{
2795	struct pipe_resource *res = &tex->resource.b.b;
2796	const struct util_format_description *desc;
2797	unsigned char swizzle[4];
2798	int first_non_void;
2799	unsigned num_format, data_format, type;
2800	uint64_t va;
2801
2802	desc = util_format_description(pipe_format);
2803
2804	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2805		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2806		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2807
2808		switch (pipe_format) {
2809		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2810		case PIPE_FORMAT_X24S8_UINT:
2811		case PIPE_FORMAT_X32_S8X24_UINT:
2812		case PIPE_FORMAT_X8Z24_UNORM:
2813			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2814			break;
2815		default:
2816			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2817		}
2818	} else {
2819		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2820	}
2821
2822	first_non_void = util_format_get_first_non_void_channel(pipe_format);
2823
2824	switch (pipe_format) {
2825	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2826		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2827		break;
2828	default:
2829		if (first_non_void < 0) {
2830			if (util_format_is_compressed(pipe_format)) {
2831				switch (pipe_format) {
2832				case PIPE_FORMAT_DXT1_SRGB:
2833				case PIPE_FORMAT_DXT1_SRGBA:
2834				case PIPE_FORMAT_DXT3_SRGBA:
2835				case PIPE_FORMAT_DXT5_SRGBA:
2836				case PIPE_FORMAT_BPTC_SRGBA:
2837				case PIPE_FORMAT_ETC2_SRGB8:
2838				case PIPE_FORMAT_ETC2_SRGB8A1:
2839				case PIPE_FORMAT_ETC2_SRGBA8:
2840					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2841					break;
2842				case PIPE_FORMAT_RGTC1_SNORM:
2843				case PIPE_FORMAT_LATC1_SNORM:
2844				case PIPE_FORMAT_RGTC2_SNORM:
2845				case PIPE_FORMAT_LATC2_SNORM:
2846				case PIPE_FORMAT_ETC2_R11_SNORM:
2847				case PIPE_FORMAT_ETC2_RG11_SNORM:
2848				/* implies float, so use SNORM/UNORM to determine
2849				   whether data is signed or not */
2850				case PIPE_FORMAT_BPTC_RGB_FLOAT:
2851					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2852					break;
2853				default:
2854					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2855					break;
2856				}
2857			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2858				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2859			} else {
2860				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2861			}
2862		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2863			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2864		} else {
2865			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2866
2867			switch (desc->channel[first_non_void].type) {
2868			case UTIL_FORMAT_TYPE_FLOAT:
2869				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2870				break;
2871			case UTIL_FORMAT_TYPE_SIGNED:
2872				if (desc->channel[first_non_void].normalized)
2873					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2874				else if (desc->channel[first_non_void].pure_integer)
2875					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2876				else
2877					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2878				break;
2879			case UTIL_FORMAT_TYPE_UNSIGNED:
2880				if (desc->channel[first_non_void].normalized)
2881					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2882				else if (desc->channel[first_non_void].pure_integer)
2883					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2884				else
2885					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2886			}
2887		}
2888	}
2889
2890	data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void);
2891	if (data_format == ~0) {
2892		data_format = 0;
2893	}
2894
2895	if (!sampler &&
2896	    (res->target == PIPE_TEXTURE_CUBE ||
2897	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
2898	     res->target == PIPE_TEXTURE_3D)) {
2899		/* For the purpose of shader images, treat cube maps and 3D
2900		 * textures as 2D arrays. For 3D textures, the address
2901		 * calculations for mipmaps are different, so we rely on the
2902		 * caller to effectively disable mipmaps.
2903		 */
2904		type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
2905
2906		assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
2907	} else {
2908		type = si_tex_dim(res->target, target, res->nr_samples);
2909	}
2910
2911	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
2912	        height = 1;
2913		depth = res->array_size;
2914	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
2915		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2916		if (sampler || res->target != PIPE_TEXTURE_3D)
2917			depth = res->array_size;
2918	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
2919		depth = res->array_size / 6;
2920
2921	state[0] = 0;
2922	state[1] = (S_008F14_DATA_FORMAT(data_format) |
2923		    S_008F14_NUM_FORMAT(num_format));
2924	state[2] = (S_008F18_WIDTH(width - 1) |
2925		    S_008F18_HEIGHT(height - 1) |
2926		    S_008F18_PERF_MOD(4));
2927	state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2928		    S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2929		    S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2930		    S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2931		    S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
2932					0 : first_level) |
2933		    S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
2934					util_logbase2(res->nr_samples) :
2935					last_level) |
2936		    S_008F1C_POW2_PAD(res->last_level > 0) |
2937		    S_008F1C_TYPE(type));
2938	state[4] = S_008F20_DEPTH(depth - 1);
2939	state[5] = (S_008F24_BASE_ARRAY(first_layer) |
2940		    S_008F24_LAST_ARRAY(last_layer));
2941	state[6] = 0;
2942	state[7] = 0;
2943
2944	if (tex->dcc_offset) {
2945		unsigned swap = r600_translate_colorswap(pipe_format, false);
2946
2947		state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
2948	} else {
2949		/* The last dword is unused by hw. The shader uses it to clear
2950		 * bits in the first dword of sampler state.
2951		 */
2952		if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
2953			if (first_level == last_level)
2954				state[7] = C_008F30_MAX_ANISO_RATIO;
2955			else
2956				state[7] = 0xffffffff;
2957		}
2958	}
2959
2960	/* Initialize the sampler view for FMASK. */
2961	if (tex->fmask.size) {
2962		uint32_t fmask_format;
2963
2964		va = tex->resource.gpu_address + tex->fmask.offset;
2965
2966		switch (res->nr_samples) {
2967		case 2:
2968			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2969			break;
2970		case 4:
2971			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2972			break;
2973		case 8:
2974			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2975			break;
2976		default:
2977			assert(0);
2978			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2979		}
2980
2981		fmask_state[0] = va >> 8;
2982		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2983				 S_008F14_DATA_FORMAT(fmask_format) |
2984				 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2985		fmask_state[2] = S_008F18_WIDTH(width - 1) |
2986				 S_008F18_HEIGHT(height - 1);
2987		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2988				 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2989				 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2990				 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2991				 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
2992				 S_008F1C_TYPE(si_tex_dim(res->target, target, 0));
2993		fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2994				 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1);
2995		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
2996				 S_008F24_LAST_ARRAY(last_layer);
2997		fmask_state[6] = 0;
2998		fmask_state[7] = 0;
2999	}
3000}
3001
3002/**
3003 * Create a sampler view.
3004 *
3005 * @param ctx		context
3006 * @param texture	texture
3007 * @param state		sampler view template
3008 * @param width0	width0 override (for compressed textures as int)
3009 * @param height0	height0 override (for compressed textures as int)
3010 * @param force_level   set the base address to the level (for compressed textures)
3011 */
3012struct pipe_sampler_view *
3013si_create_sampler_view_custom(struct pipe_context *ctx,
3014			      struct pipe_resource *texture,
3015			      const struct pipe_sampler_view *state,
3016			      unsigned width0, unsigned height0,
3017			      unsigned force_level)
3018{
3019	struct si_context *sctx = (struct si_context*)ctx;
3020	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
3021	struct r600_texture *tmp = (struct r600_texture*)texture;
3022	unsigned base_level, first_level, last_level;
3023	unsigned char state_swizzle[4];
3024	unsigned height, depth, width;
3025	unsigned last_layer = state->u.tex.last_layer;
3026	enum pipe_format pipe_format;
3027	const struct radeon_surf_level *surflevel;
3028
3029	if (!view)
3030		return NULL;
3031
3032	/* initialize base object */
3033	view->base = *state;
3034	view->base.texture = NULL;
3035	view->base.reference.count = 1;
3036	view->base.context = ctx;
3037
3038	assert(texture);
3039	pipe_resource_reference(&view->base.texture, texture);
3040
3041	if (state->format == PIPE_FORMAT_X24S8_UINT ||
3042	    state->format == PIPE_FORMAT_S8X24_UINT ||
3043	    state->format == PIPE_FORMAT_X32_S8X24_UINT ||
3044	    state->format == PIPE_FORMAT_S8_UINT)
3045		view->is_stencil_sampler = true;
3046
3047	/* Buffer resource. */
3048	if (texture->target == PIPE_BUFFER) {
3049		si_make_buffer_descriptor(sctx->screen,
3050					  (struct r600_resource *)texture,
3051					  state->format,
3052					  state->u.buf.offset,
3053					  state->u.buf.size,
3054					  view->state);
3055		return &view->base;
3056	}
3057
3058	state_swizzle[0] = state->swizzle_r;
3059	state_swizzle[1] = state->swizzle_g;
3060	state_swizzle[2] = state->swizzle_b;
3061	state_swizzle[3] = state->swizzle_a;
3062
3063	base_level = 0;
3064	first_level = state->u.tex.first_level;
3065	last_level = state->u.tex.last_level;
3066	width = width0;
3067	height = height0;
3068	depth = texture->depth0;
3069
3070	if (force_level) {
3071		assert(force_level == first_level &&
3072		       force_level == last_level);
3073		base_level = force_level;
3074		first_level = 0;
3075		last_level = 0;
3076		width = u_minify(width, force_level);
3077		height = u_minify(height, force_level);
3078		depth = u_minify(depth, force_level);
3079	}
3080
3081	/* This is not needed if state trackers set last_layer correctly. */
3082	if (state->target == PIPE_TEXTURE_1D ||
3083	    state->target == PIPE_TEXTURE_2D ||
3084	    state->target == PIPE_TEXTURE_RECT ||
3085	    state->target == PIPE_TEXTURE_CUBE)
3086		last_layer = state->u.tex.first_layer;
3087
3088	/* Texturing with separate depth and stencil. */
3089	pipe_format = state->format;
3090
3091	/* Depth/stencil texturing sometimes needs separate texture. */
3092	if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
3093		if (!tmp->flushed_depth_texture &&
3094		    !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
3095			pipe_resource_reference(&view->base.texture, NULL);
3096			FREE(view);
3097			return NULL;
3098		}
3099
3100		assert(tmp->flushed_depth_texture);
3101
3102		/* Override format for the case where the flushed texture
3103		 * contains only Z or only S.
3104		 */
3105		if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
3106			pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
3107
3108		tmp = tmp->flushed_depth_texture;
3109	}
3110
3111	surflevel = tmp->surface.level;
3112
3113	if (tmp->db_compatible) {
3114		if (!view->is_stencil_sampler)
3115			pipe_format = tmp->db_render_format;
3116
3117		switch (pipe_format) {
3118		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
3119			pipe_format = PIPE_FORMAT_Z32_FLOAT;
3120			break;
3121		case PIPE_FORMAT_X8Z24_UNORM:
3122		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
3123			/* Z24 is always stored like this for DB
3124			 * compatibility.
3125			 */
3126			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
3127			break;
3128		case PIPE_FORMAT_X24S8_UINT:
3129		case PIPE_FORMAT_S8X24_UINT:
3130		case PIPE_FORMAT_X32_S8X24_UINT:
3131			pipe_format = PIPE_FORMAT_S8_UINT;
3132			surflevel = tmp->surface.stencil_level;
3133			break;
3134		default:;
3135		}
3136	}
3137
3138	vi_dcc_disable_if_incompatible_format(&sctx->b, texture,
3139					      state->u.tex.first_level,
3140					      state->format);
3141
3142	si_make_texture_descriptor(sctx->screen, tmp, true,
3143				   state->target, pipe_format, state_swizzle,
3144				   first_level, last_level,
3145				   state->u.tex.first_layer, last_layer,
3146				   width, height, depth,
3147				   view->state, view->fmask_state);
3148
3149	view->base_level_info = &surflevel[base_level];
3150	view->base_level = base_level;
3151	view->block_width = util_format_get_blockwidth(pipe_format);
3152	return &view->base;
3153}
3154
3155static struct pipe_sampler_view *
3156si_create_sampler_view(struct pipe_context *ctx,
3157		       struct pipe_resource *texture,
3158		       const struct pipe_sampler_view *state)
3159{
3160	return si_create_sampler_view_custom(ctx, texture, state,
3161					     texture ? texture->width0 : 0,
3162					     texture ? texture->height0 : 0, 0);
3163}
3164
3165static void si_sampler_view_destroy(struct pipe_context *ctx,
3166				    struct pipe_sampler_view *state)
3167{
3168	struct si_sampler_view *view = (struct si_sampler_view *)state;
3169
3170	pipe_resource_reference(&state->texture, NULL);
3171	FREE(view);
3172}
3173
3174static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
3175{
3176	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
3177	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
3178	       (linear_filter &&
3179	        (wrap == PIPE_TEX_WRAP_CLAMP ||
3180		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
3181}
3182
3183static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
3184{
3185	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
3186			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
3187
3188	return (state->border_color.ui[0] || state->border_color.ui[1] ||
3189		state->border_color.ui[2] || state->border_color.ui[3]) &&
3190	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
3191		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
3192		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
3193}
3194
3195static void *si_create_sampler_state(struct pipe_context *ctx,
3196				     const struct pipe_sampler_state *state)
3197{
3198	struct si_context *sctx = (struct si_context *)ctx;
3199	struct r600_common_screen *rscreen = sctx->b.screen;
3200	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
3201	unsigned border_color_type, border_color_index = 0;
3202	unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
3203						       : state->max_anisotropy;
3204	unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
3205
3206	if (!rstate) {
3207		return NULL;
3208	}
3209
3210	if (!sampler_state_needs_border_color(state))
3211		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3212	else if (state->border_color.f[0] == 0 &&
3213		 state->border_color.f[1] == 0 &&
3214		 state->border_color.f[2] == 0 &&
3215		 state->border_color.f[3] == 0)
3216		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3217	else if (state->border_color.f[0] == 0 &&
3218		 state->border_color.f[1] == 0 &&
3219		 state->border_color.f[2] == 0 &&
3220		 state->border_color.f[3] == 1)
3221		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3222	else if (state->border_color.f[0] == 1 &&
3223		 state->border_color.f[1] == 1 &&
3224		 state->border_color.f[2] == 1 &&
3225		 state->border_color.f[3] == 1)
3226		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3227	else {
3228		int i;
3229
3230		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
3231
3232		/* Check if the border has been uploaded already. */
3233		for (i = 0; i < sctx->border_color_count; i++)
3234			if (memcmp(&sctx->border_color_table[i], &state->border_color,
3235				   sizeof(state->border_color)) == 0)
3236				break;
3237
3238		if (i >= SI_MAX_BORDER_COLORS) {
3239			/* Getting 4096 unique border colors is very unlikely. */
3240			fprintf(stderr, "radeonsi: The border color table is full. "
3241				"Any new border colors will be just black. "
3242				"Please file a bug.\n");
3243			border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3244		} else {
3245			if (i == sctx->border_color_count) {
3246				/* Upload a new border color. */
3247				memcpy(&sctx->border_color_table[i], &state->border_color,
3248				       sizeof(state->border_color));
3249				util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
3250							&state->border_color,
3251							sizeof(state->border_color));
3252				sctx->border_color_count++;
3253			}
3254
3255			border_color_index = i;
3256		}
3257	}
3258
3259#ifdef DEBUG
3260	rstate->magic = SI_SAMPLER_STATE_MAGIC;
3261#endif
3262	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
3263			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
3264			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
3265			  S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3266			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
3267			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
3268			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3269			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
3270			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
3271			  S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
3272	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
3273			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
3274			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3275	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
3276			  S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
3277			  S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
3278			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
3279			  S_008F38_MIP_POINT_PRECLAMP(1) |
3280			  S_008F38_DISABLE_LSB_CEIL(1) |
3281			  S_008F38_FILTER_PREC_FIX(1) |
3282			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
3283	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
3284			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
3285	return rstate;
3286}
3287
3288static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3289{
3290	struct si_context *sctx = (struct si_context *)ctx;
3291
3292	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
3293		return;
3294
3295	sctx->sample_mask.sample_mask = sample_mask;
3296	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
3297}
3298
3299static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
3300{
3301	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
3302	unsigned mask = sctx->sample_mask.sample_mask;
3303
3304	/* Needed for line and polygon smoothing as well as for the Polaris
3305	 * small primitive filter. We expect the state tracker to take care of
3306	 * this for us.
3307	 */
3308	assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
3309	       (mask & 1 && sctx->blitter->running));
3310
3311	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
3312	radeon_emit(cs, mask | (mask << 16));
3313	radeon_emit(cs, mask | (mask << 16));
3314}
3315
3316static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3317{
3318#ifdef DEBUG
3319	struct si_sampler_state *s = state;
3320
3321	assert(s->magic == SI_SAMPLER_STATE_MAGIC);
3322	s->magic = 0;
3323#endif
3324	free(state);
3325}
3326
3327/*
3328 * Vertex elements & buffers
3329 */
3330
3331static void *si_create_vertex_elements(struct pipe_context *ctx,
3332				       unsigned count,
3333				       const struct pipe_vertex_element *elements)
3334{
3335	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3336	int i;
3337
3338	assert(count <= SI_MAX_ATTRIBS);
3339	if (!v)
3340		return NULL;
3341
3342	v->count = count;
3343	for (i = 0; i < count; ++i) {
3344		const struct util_format_description *desc;
3345		unsigned data_format, num_format;
3346		int first_non_void;
3347
3348		desc = util_format_description(elements[i].src_format);
3349		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3350		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
3351		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
3352
3353		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3354				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3355				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3356				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3357				   S_008F0C_NUM_FORMAT(num_format) |
3358				   S_008F0C_DATA_FORMAT(data_format);
3359		v->format_size[i] = desc->block.bits / 8;
3360
3361		/* The hardware always treats the 2-bit alpha channel as
3362		 * unsigned, so a shader workaround is needed.
3363		 */
3364		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
3365			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
3366				v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
3367			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
3368				v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
3369			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
3370				/* This isn't actually used in OpenGL. */
3371				v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
3372			}
3373		}
3374
3375		/* We work around the fact that 8_8_8 and 16_16_16 data formats
3376		 * do not exist by using the corresponding 4-component formats.
3377		 * This requires a fixup of the descriptor for bounds checks.
3378		 */
3379		if (desc->block.bits == 3 * 8 ||
3380		    desc->block.bits == 3 * 16) {
3381			v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
3382		}
3383	}
3384	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3385
3386	return v;
3387}
3388
3389static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3390{
3391	struct si_context *sctx = (struct si_context *)ctx;
3392	struct si_vertex_element *v = (struct si_vertex_element*)state;
3393
3394	sctx->vertex_elements = v;
3395	sctx->vertex_buffers_dirty = true;
3396	sctx->do_update_shaders = true;
3397}
3398
3399static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3400{
3401	struct si_context *sctx = (struct si_context *)ctx;
3402
3403	if (sctx->vertex_elements == state)
3404		sctx->vertex_elements = NULL;
3405	FREE(state);
3406}
3407
3408static void si_set_vertex_buffers(struct pipe_context *ctx,
3409				  unsigned start_slot, unsigned count,
3410				  const struct pipe_vertex_buffer *buffers)
3411{
3412	struct si_context *sctx = (struct si_context *)ctx;
3413	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
3414	int i;
3415
3416	assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
3417
3418	if (buffers) {
3419		for (i = 0; i < count; i++) {
3420			const struct pipe_vertex_buffer *src = buffers + i;
3421			struct pipe_vertex_buffer *dsti = dst + i;
3422			struct pipe_resource *buf = src->buffer;
3423
3424			pipe_resource_reference(&dsti->buffer, buf);
3425			dsti->buffer_offset = src->buffer_offset;
3426			dsti->stride = src->stride;
3427			r600_context_add_resource_size(ctx, buf);
3428			if (buf)
3429				r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3430		}
3431	} else {
3432		for (i = 0; i < count; i++) {
3433			pipe_resource_reference(&dst[i].buffer, NULL);
3434		}
3435	}
3436	sctx->vertex_buffers_dirty = true;
3437}
3438
3439static void si_set_index_buffer(struct pipe_context *ctx,
3440				const struct pipe_index_buffer *ib)
3441{
3442	struct si_context *sctx = (struct si_context *)ctx;
3443
3444	if (ib) {
3445		struct pipe_resource *buf = ib->buffer;
3446
3447		pipe_resource_reference(&sctx->index_buffer.buffer, buf);
3448	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
3449		r600_context_add_resource_size(ctx, buf);
3450		if (buf)
3451			r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER;
3452	} else {
3453		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
3454	}
3455}
3456
3457/*
3458 * Misc
3459 */
3460
3461static void si_set_tess_state(struct pipe_context *ctx,
3462			      const float default_outer_level[4],
3463			      const float default_inner_level[2])
3464{
3465	struct si_context *sctx = (struct si_context *)ctx;
3466	struct pipe_constant_buffer cb;
3467	float array[8];
3468
3469	memcpy(array, default_outer_level, sizeof(float) * 4);
3470	memcpy(array+4, default_inner_level, sizeof(float) * 2);
3471
3472	cb.buffer = NULL;
3473	cb.user_buffer = NULL;
3474	cb.buffer_size = sizeof(array);
3475
3476	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
3477			       (void*)array, sizeof(array),
3478			       &cb.buffer_offset);
3479
3480	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
3481	pipe_resource_reference(&cb.buffer, NULL);
3482}
3483
3484static void si_texture_barrier(struct pipe_context *ctx)
3485{
3486	struct si_context *sctx = (struct si_context *)ctx;
3487
3488	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
3489			 SI_CONTEXT_INV_GLOBAL_L2 |
3490			 SI_CONTEXT_FLUSH_AND_INV_CB |
3491			 SI_CONTEXT_CS_PARTIAL_FLUSH;
3492}
3493
3494/* This only ensures coherency for shader image/buffer stores. */
3495static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
3496{
3497	struct si_context *sctx = (struct si_context *)ctx;
3498
3499	/* Subsequent commands must wait for all shader invocations to
3500	 * complete. */
3501	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
3502	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
3503
3504	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
3505		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
3506				 SI_CONTEXT_INV_VMEM_L1;
3507
3508	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
3509		     PIPE_BARRIER_SHADER_BUFFER |
3510		     PIPE_BARRIER_TEXTURE |
3511		     PIPE_BARRIER_IMAGE |
3512		     PIPE_BARRIER_STREAMOUT_BUFFER |
3513		     PIPE_BARRIER_GLOBAL_BUFFER)) {
3514		/* As far as I can tell, L1 contents are written back to L2
3515		 * automatically at end of shader, but the contents of other
3516		 * L1 caches might still be stale. */
3517		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
3518	}
3519
3520	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
3521		/* Indices are read through TC L2 since VI.
3522		 * L1 isn't used.
3523		 */
3524		if (sctx->screen->b.chip_class <= CIK)
3525			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3526	}
3527
3528	if (flags & PIPE_BARRIER_FRAMEBUFFER)
3529		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
3530
3531	if (flags & (PIPE_BARRIER_FRAMEBUFFER |
3532		     PIPE_BARRIER_INDIRECT_BUFFER))
3533		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3534}
3535
3536static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3537{
3538	struct pipe_blend_state blend;
3539
3540	memset(&blend, 0, sizeof(blend));
3541	blend.independent_blend_enable = true;
3542	blend.rt[0].colormask = 0xf;
3543	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3544}
3545
3546static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3547				 bool include_draw_vbo)
3548{
3549	si_need_cs_space((struct si_context*)ctx);
3550}
3551
3552static void si_init_config(struct si_context *sctx);
3553
3554void si_init_state_functions(struct si_context *sctx)
3555{
3556	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
3557	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
3558	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
3559	si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
3560	si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
3561
3562	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
3563	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
3564	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
3565	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
3566	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
3567	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
3568	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
3569	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
3570	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
3571	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
3572
3573	sctx->b.b.create_blend_state = si_create_blend_state;
3574	sctx->b.b.bind_blend_state = si_bind_blend_state;
3575	sctx->b.b.delete_blend_state = si_delete_blend_state;
3576	sctx->b.b.set_blend_color = si_set_blend_color;
3577
3578	sctx->b.b.create_rasterizer_state = si_create_rs_state;
3579	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3580	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3581
3582	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3583	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3584	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3585
3586	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3587	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3588	sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3589	sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3590	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
3591
3592	sctx->b.b.set_clip_state = si_set_clip_state;
3593	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
3594
3595	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3596	sctx->b.b.get_sample_position = cayman_get_sample_position;
3597
3598	sctx->b.b.create_sampler_state = si_create_sampler_state;
3599	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3600
3601	sctx->b.b.create_sampler_view = si_create_sampler_view;
3602	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3603
3604	sctx->b.b.set_sample_mask = si_set_sample_mask;
3605
3606	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3607	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3608	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3609	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3610	sctx->b.b.set_index_buffer = si_set_index_buffer;
3611
3612	sctx->b.b.texture_barrier = si_texture_barrier;
3613	sctx->b.b.memory_barrier = si_memory_barrier;
3614	sctx->b.b.set_min_samples = si_set_min_samples;
3615	sctx->b.b.set_tess_state = si_set_tess_state;
3616
3617	sctx->b.b.set_active_query_state = si_set_active_query_state;
3618	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3619	sctx->b.save_qbo_state = si_save_qbo_state;
3620	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3621
3622	sctx->b.b.draw_vbo = si_draw_vbo;
3623
3624	si_init_config(sctx);
3625}
3626
3627static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen)
3628{
3629	return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
3630}
3631
3632static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
3633				     struct r600_texture *rtex,
3634			             struct radeon_bo_metadata *md)
3635{
3636	struct si_screen *sscreen = (struct si_screen*)rscreen;
3637	struct pipe_resource *res = &rtex->resource.b.b;
3638	static const unsigned char swizzle[] = {
3639		PIPE_SWIZZLE_X,
3640		PIPE_SWIZZLE_Y,
3641		PIPE_SWIZZLE_Z,
3642		PIPE_SWIZZLE_W
3643	};
3644	uint32_t desc[8], i;
3645	bool is_array = util_resource_is_array_texture(res);
3646
3647	/* DRM 2.x.x doesn't support this. */
3648	if (rscreen->info.drm_major != 3)
3649		return;
3650
3651	assert(rtex->dcc_separate_buffer == NULL);
3652	assert(rtex->fmask.size == 0);
3653
3654	/* Metadata image format format version 1:
3655	 * [0] = 1 (metadata format identifier)
3656	 * [1] = (VENDOR_ID << 16) | PCI_ID
3657	 * [2:9] = image descriptor for the whole resource
3658	 *         [2] is always 0, because the base address is cleared
3659	 *         [9] is the DCC offset bits [39:8] from the beginning of
3660	 *             the buffer
3661	 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
3662	 */
3663
3664	md->metadata[0] = 1; /* metadata image format version 1 */
3665
3666	/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
3667	md->metadata[1] = si_get_bo_metadata_word1(rscreen);
3668
3669	si_make_texture_descriptor(sscreen, rtex, true,
3670				   res->target, res->format,
3671				   swizzle, 0, res->last_level, 0,
3672				   is_array ? res->array_size - 1 : 0,
3673				   res->width0, res->height0, res->depth0,
3674				   desc, NULL);
3675
3676	si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0,
3677				       rtex->surface.blk_w, false, desc);
3678
3679	/* Clear the base address and set the relative DCC offset. */
3680	desc[0] = 0;
3681	desc[1] &= C_008F14_BASE_ADDRESS_HI;
3682	desc[7] = rtex->dcc_offset >> 8;
3683
3684	/* Dwords [2:9] contain the image descriptor. */
3685	memcpy(&md->metadata[2], desc, sizeof(desc));
3686
3687	/* Dwords [10:..] contain the mipmap level offsets. */
3688	for (i = 0; i <= res->last_level; i++)
3689		md->metadata[10+i] = rtex->surface.level[i].offset >> 8;
3690
3691	md->size_metadata = (11 + res->last_level) * 4;
3692}
3693
3694static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
3695				     struct r600_texture *rtex,
3696			             struct radeon_bo_metadata *md)
3697{
3698	uint32_t *desc = &md->metadata[2];
3699
3700	if (rscreen->chip_class < VI)
3701		return;
3702
3703	/* Return if DCC is enabled. The texture should be set up with it
3704	 * already.
3705	 */
3706	if (md->size_metadata >= 11 * 4 &&
3707	    md->metadata[0] != 0 &&
3708	    md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
3709	    G_008F28_COMPRESSION_EN(desc[6])) {
3710		assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8));
3711		return;
3712	}
3713
3714	/* Disable DCC. These are always set by texture_from_handle and must
3715	 * be cleared here.
3716	 */
3717	rtex->dcc_offset = 0;
3718}
3719
3720void si_init_screen_state_functions(struct si_screen *sscreen)
3721{
3722	sscreen->b.b.is_format_supported = si_is_format_supported;
3723	sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
3724	sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
3725}
3726
3727static void
3728si_write_harvested_raster_configs(struct si_context *sctx,
3729				  struct si_pm4_state *pm4,
3730				  unsigned raster_config,
3731				  unsigned raster_config_1)
3732{
3733	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
3734	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
3735	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3736	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3737	unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
3738	unsigned rb_per_se = num_rb / num_se;
3739	unsigned se_mask[4];
3740	unsigned se;
3741
3742	se_mask[0] = ((1 << rb_per_se) - 1);
3743	se_mask[1] = (se_mask[0] << rb_per_se);
3744	se_mask[2] = (se_mask[1] << rb_per_se);
3745	se_mask[3] = (se_mask[2] << rb_per_se);
3746
3747	se_mask[0] &= rb_mask;
3748	se_mask[1] &= rb_mask;
3749	se_mask[2] &= rb_mask;
3750	se_mask[3] &= rb_mask;
3751
3752	assert(num_se == 1 || num_se == 2 || num_se == 4);
3753	assert(sh_per_se == 1 || sh_per_se == 2);
3754	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
3755
3756	/* XXX: I can't figure out what the *_XSEL and *_YSEL
3757	 * fields are for, so I'm leaving them as their default
3758	 * values. */
3759
3760	for (se = 0; se < num_se; se++) {
3761		unsigned raster_config_se = raster_config;
3762		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3763		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3764		int idx = (se / 2) * 2;
3765
3766		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3767			raster_config_se &= C_028350_SE_MAP;
3768
3769			if (!se_mask[idx]) {
3770				raster_config_se |=
3771					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
3772			} else {
3773				raster_config_se |=
3774					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
3775			}
3776		}
3777
3778		pkr0_mask &= rb_mask;
3779		pkr1_mask &= rb_mask;
3780		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3781			raster_config_se &= C_028350_PKR_MAP;
3782
3783			if (!pkr0_mask) {
3784				raster_config_se |=
3785					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
3786			} else {
3787				raster_config_se |=
3788					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
3789			}
3790		}
3791
3792		if (rb_per_se >= 2) {
3793			unsigned rb0_mask = 1 << (se * rb_per_se);
3794			unsigned rb1_mask = rb0_mask << 1;
3795
3796			rb0_mask &= rb_mask;
3797			rb1_mask &= rb_mask;
3798			if (!rb0_mask || !rb1_mask) {
3799				raster_config_se &= C_028350_RB_MAP_PKR0;
3800
3801				if (!rb0_mask) {
3802					raster_config_se |=
3803						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
3804				} else {
3805					raster_config_se |=
3806						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
3807				}
3808			}
3809
3810			if (rb_per_se > 2) {
3811				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3812				rb1_mask = rb0_mask << 1;
3813				rb0_mask &= rb_mask;
3814				rb1_mask &= rb_mask;
3815				if (!rb0_mask || !rb1_mask) {
3816					raster_config_se &= C_028350_RB_MAP_PKR1;
3817
3818					if (!rb0_mask) {
3819						raster_config_se |=
3820							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3821					} else {
3822						raster_config_se |=
3823							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3824					}
3825				}
3826			}
3827		}
3828
3829		/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3830		if (sctx->b.chip_class < CIK)
3831			si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3832				       SE_INDEX(se) | SH_BROADCAST_WRITES |
3833				       INSTANCE_BROADCAST_WRITES);
3834		else
3835			si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3836				       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
3837				       S_030800_INSTANCE_BROADCAST_WRITES(1));
3838		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3839	}
3840
3841	/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3842	if (sctx->b.chip_class < CIK)
3843		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3844			       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3845			       INSTANCE_BROADCAST_WRITES);
3846	else {
3847		si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3848			       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
3849			       S_030800_INSTANCE_BROADCAST_WRITES(1));
3850
3851		if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3852		                     (!se_mask[2] && !se_mask[3]))) {
3853			raster_config_1 &= C_028354_SE_PAIR_MAP;
3854
3855			if (!se_mask[0] && !se_mask[1]) {
3856				raster_config_1 |=
3857					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
3858			} else {
3859				raster_config_1 |=
3860					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
3861			}
3862		}
3863
3864		si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
3865	}
3866}
3867
3868static void si_init_config(struct si_context *sctx)
3869{
3870	struct si_screen *sscreen = sctx->screen;
3871	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3872	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3873	unsigned raster_config, raster_config_1;
3874	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
3875	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3876
3877	if (!pm4)
3878		return;
3879
3880	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
3881	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
3882	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
3883	si_pm4_cmd_end(pm4, false);
3884
3885	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
3886	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
3887
3888	/* FIXME calculate these values somehow ??? */
3889	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
3890	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3891	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3892
3893	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3894	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3895
3896	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3897	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3898	if (sctx->b.chip_class < CIK)
3899		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3900			       S_008A14_CLIP_VTX_REORDER_ENA(1));
3901
3902	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3903	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3904
3905	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3906
3907	switch (sctx->screen->b.family) {
3908	case CHIP_TAHITI:
3909	case CHIP_PITCAIRN:
3910		raster_config = 0x2a00126a;
3911		raster_config_1 = 0x00000000;
3912		break;
3913	case CHIP_VERDE:
3914		raster_config = 0x0000124a;
3915		raster_config_1 = 0x00000000;
3916		break;
3917	case CHIP_OLAND:
3918		raster_config = 0x00000082;
3919		raster_config_1 = 0x00000000;
3920		break;
3921	case CHIP_HAINAN:
3922		raster_config = 0x00000000;
3923		raster_config_1 = 0x00000000;
3924		break;
3925	case CHIP_BONAIRE:
3926		raster_config = 0x16000012;
3927		raster_config_1 = 0x00000000;
3928		break;
3929	case CHIP_HAWAII:
3930		raster_config = 0x3a00161a;
3931		raster_config_1 = 0x0000002e;
3932		break;
3933	case CHIP_FIJI:
3934		if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
3935			/* old kernels with old tiling config */
3936			raster_config = 0x16000012;
3937			raster_config_1 = 0x0000002a;
3938		} else {
3939			raster_config = 0x3a00161a;
3940			raster_config_1 = 0x0000002e;
3941		}
3942		break;
3943	case CHIP_POLARIS10:
3944		raster_config = 0x16000012;
3945		raster_config_1 = 0x0000002a;
3946		break;
3947	case CHIP_POLARIS11:
3948	case CHIP_POLARIS12:
3949		raster_config = 0x16000012;
3950		raster_config_1 = 0x00000000;
3951		break;
3952	case CHIP_TONGA:
3953		raster_config = 0x16000012;
3954		raster_config_1 = 0x0000002a;
3955		break;
3956	case CHIP_ICELAND:
3957		if (num_rb == 1)
3958			raster_config = 0x00000000;
3959		else
3960			raster_config = 0x00000002;
3961		raster_config_1 = 0x00000000;
3962		break;
3963	case CHIP_CARRIZO:
3964		raster_config = 0x00000002;
3965		raster_config_1 = 0x00000000;
3966		break;
3967	case CHIP_KAVERI:
3968		/* KV should be 0x00000002, but that causes problems with radeon */
3969		raster_config = 0x00000000; /* 0x00000002 */
3970		raster_config_1 = 0x00000000;
3971		break;
3972	case CHIP_KABINI:
3973	case CHIP_MULLINS:
3974	case CHIP_STONEY:
3975		raster_config = 0x00000000;
3976		raster_config_1 = 0x00000000;
3977		break;
3978	default:
3979		fprintf(stderr,
3980			"radeonsi: Unknown GPU, using 0 for raster_config\n");
3981		raster_config = 0x00000000;
3982		raster_config_1 = 0x00000000;
3983		break;
3984	}
3985
3986	/* Always use the default config when all backends are enabled
3987	 * (or when we failed to determine the enabled backends).
3988	 */
3989	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
3990		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
3991			       raster_config);
3992		if (sctx->b.chip_class >= CIK)
3993			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
3994				       raster_config_1);
3995	} else {
3996		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
3997	}
3998
3999	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
4000	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
4001	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
4002		       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
4003	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
4004	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
4005		       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
4006
4007	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
4008	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
4009		       S_028230_ER_TRI(0xA) |
4010		       S_028230_ER_POINT(0xA) |
4011		       S_028230_ER_RECT(0xA) |
4012		       /* Required by DX10_DIAMOND_TEST_ENA: */
4013		       S_028230_ER_LINE_LR(0x1A) |
4014		       S_028230_ER_LINE_RL(0x26) |
4015		       S_028230_ER_LINE_TB(0xA) |
4016		       S_028230_ER_LINE_BT(0xA));
4017	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
4018	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
4019	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
4020	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
4021	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
4022	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
4023	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
4024
4025	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
4026	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
4027	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
4028
4029	if (sctx->b.chip_class >= CIK) {
4030		/* If this is 0, Bonaire can hang even if GS isn't being used.
4031		 * Other chips are unaffected. These are suboptimal values,
4032		 * but we don't use on-chip GS.
4033		 */
4034		si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
4035			       S_028A44_ES_VERTS_PER_SUBGRP(64) |
4036			       S_028A44_GS_PRIMS_PER_SUBGRP(4));
4037
4038		si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
4039		si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
4040		si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
4041		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
4042
4043		if (sscreen->b.info.num_good_compute_units /
4044		    (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
4045			/* Too few available compute units per SH. Disallowing
4046			 * VS to run on CU0 could hurt us more than late VS
4047			 * allocation would help.
4048			 *
4049			 * LATE_ALLOC_VS = 2 is the highest safe number.
4050			 */
4051			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
4052			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
4053		} else {
4054			/* Set LATE_ALLOC_VS == 31. It should be less than
4055			 * the number of scratch waves. Limitations:
4056			 * - VS can't execute on CU0.
4057			 * - If HS writes outputs to LDS, LS can't execute on CU0.
4058			 */
4059			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
4060			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
4061		}
4062
4063		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
4064	}
4065
4066	if (sctx->b.chip_class >= VI) {
4067		unsigned vgt_tess_distribution;
4068
4069		si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
4070			       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
4071			       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
4072		if (sctx->b.family < CHIP_POLARIS10)
4073			si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
4074		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
4075
4076		vgt_tess_distribution =
4077			S_028B50_ACCUM_ISOLINE(32) |
4078			S_028B50_ACCUM_TRI(11) |
4079			S_028B50_ACCUM_QUAD(11) |
4080			S_028B50_DONUT_SPLIT(16);
4081
4082		/* Testing with Unigine Heaven extreme tesselation yielded best results
4083		 * with TRAP_SPLIT = 3.
4084		 */
4085		if (sctx->b.family == CHIP_FIJI ||
4086		    sctx->b.family >= CHIP_POLARIS10)
4087			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
4088
4089		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
4090	} else {
4091		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
4092		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
4093	}
4094
4095	if (sctx->b.family == CHIP_STONEY)
4096		si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
4097
4098	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
4099	if (sctx->b.chip_class >= CIK)
4100		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
4101	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
4102		      RADEON_PRIO_BORDER_COLORS);
4103
4104	si_pm4_upload_indirect_buffer(sctx, pm4);
4105	sctx->init_config = pm4;
4106}
4107