si_state.c revision b9b9540a604883e620de872537b89d47d4bceb68
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Christian König <christian.koenig@amd.com>
25 */
26
27#include "si_pipe.h"
28#include "sid.h"
29#include "radeon/r600_cs.h"
30#include "radeon/r600_query.h"
31
32#include "util/u_dual_blend.h"
33#include "util/u_format.h"
34#include "util/u_format_s3tc.h"
35#include "util/u_memory.h"
36#include "util/u_resource.h"
37
38/* Initialize an external atom (owned by ../radeon). */
39static void
40si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
41		      struct r600_atom **list_elem)
42{
43	atom->id = list_elem - sctx->atoms.array + 1;
44	*list_elem = atom;
45}
46
47/* Initialize an atom owned by radeonsi.  */
48void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
49		  struct r600_atom **list_elem,
50		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
51{
52	atom->emit = (void*)emit_func;
53	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
54	*list_elem = atom;
55}
56
57static unsigned si_map_swizzle(unsigned swizzle)
58{
59	switch (swizzle) {
60	case PIPE_SWIZZLE_Y:
61		return V_008F0C_SQ_SEL_Y;
62	case PIPE_SWIZZLE_Z:
63		return V_008F0C_SQ_SEL_Z;
64	case PIPE_SWIZZLE_W:
65		return V_008F0C_SQ_SEL_W;
66	case PIPE_SWIZZLE_0:
67		return V_008F0C_SQ_SEL_0;
68	case PIPE_SWIZZLE_1:
69		return V_008F0C_SQ_SEL_1;
70	default: /* PIPE_SWIZZLE_X */
71		return V_008F0C_SQ_SEL_X;
72	}
73}
74
75static uint32_t S_FIXED(float value, uint32_t frac_bits)
76{
77	return value * (1 << frac_bits);
78}
79
80/* 12.4 fixed-point */
81static unsigned si_pack_float_12p4(float x)
82{
83	return x <= 0    ? 0 :
84	       x >= 4096 ? 0xffff : x * 16;
85}
86
87/*
88 * Inferred framebuffer and blender state.
89 *
90 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
91 * if there is not enough PS outputs.
92 */
93static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
94{
95	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
96	struct si_state_blend *blend = sctx->queued.named.blend;
97	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
98	 * but you never know. */
99	uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
100	unsigned i;
101
102	if (blend)
103		cb_target_mask &= blend->cb_target_mask;
104
105	/* Avoid a hang that happens when dual source blending is enabled
106	 * but there is not enough color outputs. This is undefined behavior,
107	 * so disable color writes completely.
108	 *
109	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
110	 */
111	if (blend && blend->dual_src_blend &&
112	    sctx->ps_shader.cso &&
113	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
114		cb_target_mask = 0;
115
116	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
117
118	/* STONEY-specific register settings. */
119	if (sctx->b.family == CHIP_STONEY) {
120		unsigned spi_shader_col_format =
121			sctx->ps_shader.cso ?
122			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
123		unsigned sx_ps_downconvert = 0;
124		unsigned sx_blend_opt_epsilon = 0;
125		unsigned sx_blend_opt_control = 0;
126
127		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
128			struct r600_surface *surf =
129				(struct r600_surface*)sctx->framebuffer.state.cbufs[i];
130			unsigned format, swap, spi_format, colormask;
131			bool has_alpha, has_rgb;
132
133			if (!surf)
134				continue;
135
136			format = G_028C70_FORMAT(surf->cb_color_info);
137			swap = G_028C70_COMP_SWAP(surf->cb_color_info);
138			spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
139			colormask = (cb_target_mask >> (i * 4)) & 0xf;
140
141			/* Set if RGB and A are present. */
142			has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
143
144			if (format == V_028C70_COLOR_8 ||
145			    format == V_028C70_COLOR_16 ||
146			    format == V_028C70_COLOR_32)
147				has_rgb = !has_alpha;
148			else
149				has_rgb = true;
150
151			/* Check the colormask and export format. */
152			if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
153				has_rgb = false;
154			if (!(colormask & PIPE_MASK_A))
155				has_alpha = false;
156
157			if (spi_format == V_028714_SPI_SHADER_ZERO) {
158				has_rgb = false;
159				has_alpha = false;
160			}
161
162			/* Disable value checking for disabled channels. */
163			if (!has_rgb)
164				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
165			if (!has_alpha)
166				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
167
168			/* Enable down-conversion for 32bpp and smaller formats. */
169			switch (format) {
170			case V_028C70_COLOR_8:
171			case V_028C70_COLOR_8_8:
172			case V_028C70_COLOR_8_8_8_8:
173				/* For 1 and 2-channel formats, use the superset thereof. */
174				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
175				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
176				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
177					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
178					sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
179				}
180				break;
181
182			case V_028C70_COLOR_5_6_5:
183				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
184					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
185					sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
186				}
187				break;
188
189			case V_028C70_COLOR_1_5_5_5:
190				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
191					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
192					sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
193				}
194				break;
195
196			case V_028C70_COLOR_4_4_4_4:
197				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
198					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
199					sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
200				}
201				break;
202
203			case V_028C70_COLOR_32:
204				if (swap == V_0280A0_SWAP_STD &&
205				    spi_format == V_028714_SPI_SHADER_32_R)
206					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
207				else if (swap == V_0280A0_SWAP_ALT_REV &&
208					 spi_format == V_028714_SPI_SHADER_32_AR)
209					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
210				break;
211
212			case V_028C70_COLOR_16:
213			case V_028C70_COLOR_16_16:
214				/* For 1-channel formats, use the superset thereof. */
215				if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
216				    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
217				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
218				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
219					if (swap == V_0280A0_SWAP_STD ||
220					    swap == V_0280A0_SWAP_STD_REV)
221						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
222					else
223						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
224				}
225				break;
226
227			case V_028C70_COLOR_10_11_11:
228				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
229					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
230					sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
231				}
232				break;
233
234			case V_028C70_COLOR_2_10_10_10:
235				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
236					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
237					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
238				}
239				break;
240			}
241		}
242
243		if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
244			sx_ps_downconvert = 0;
245			sx_blend_opt_epsilon = 0;
246			sx_blend_opt_control = 0;
247		}
248
249		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
250		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
251		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
252		radeon_emit(cs, sx_blend_opt_control);	/* R_02875C_SX_BLEND_OPT_CONTROL */
253	}
254}
255
256/*
257 * Blender functions
258 */
259
260static uint32_t si_translate_blend_function(int blend_func)
261{
262	switch (blend_func) {
263	case PIPE_BLEND_ADD:
264		return V_028780_COMB_DST_PLUS_SRC;
265	case PIPE_BLEND_SUBTRACT:
266		return V_028780_COMB_SRC_MINUS_DST;
267	case PIPE_BLEND_REVERSE_SUBTRACT:
268		return V_028780_COMB_DST_MINUS_SRC;
269	case PIPE_BLEND_MIN:
270		return V_028780_COMB_MIN_DST_SRC;
271	case PIPE_BLEND_MAX:
272		return V_028780_COMB_MAX_DST_SRC;
273	default:
274		R600_ERR("Unknown blend function %d\n", blend_func);
275		assert(0);
276		break;
277	}
278	return 0;
279}
280
281static uint32_t si_translate_blend_factor(int blend_fact)
282{
283	switch (blend_fact) {
284	case PIPE_BLENDFACTOR_ONE:
285		return V_028780_BLEND_ONE;
286	case PIPE_BLENDFACTOR_SRC_COLOR:
287		return V_028780_BLEND_SRC_COLOR;
288	case PIPE_BLENDFACTOR_SRC_ALPHA:
289		return V_028780_BLEND_SRC_ALPHA;
290	case PIPE_BLENDFACTOR_DST_ALPHA:
291		return V_028780_BLEND_DST_ALPHA;
292	case PIPE_BLENDFACTOR_DST_COLOR:
293		return V_028780_BLEND_DST_COLOR;
294	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
295		return V_028780_BLEND_SRC_ALPHA_SATURATE;
296	case PIPE_BLENDFACTOR_CONST_COLOR:
297		return V_028780_BLEND_CONSTANT_COLOR;
298	case PIPE_BLENDFACTOR_CONST_ALPHA:
299		return V_028780_BLEND_CONSTANT_ALPHA;
300	case PIPE_BLENDFACTOR_ZERO:
301		return V_028780_BLEND_ZERO;
302	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
303		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
304	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
305		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
306	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
307		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
308	case PIPE_BLENDFACTOR_INV_DST_COLOR:
309		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
310	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
311		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
312	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
313		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
314	case PIPE_BLENDFACTOR_SRC1_COLOR:
315		return V_028780_BLEND_SRC1_COLOR;
316	case PIPE_BLENDFACTOR_SRC1_ALPHA:
317		return V_028780_BLEND_SRC1_ALPHA;
318	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
319		return V_028780_BLEND_INV_SRC1_COLOR;
320	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
321		return V_028780_BLEND_INV_SRC1_ALPHA;
322	default:
323		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
324		assert(0);
325		break;
326	}
327	return 0;
328}
329
330static uint32_t si_translate_blend_opt_function(int blend_func)
331{
332	switch (blend_func) {
333	case PIPE_BLEND_ADD:
334		return V_028760_OPT_COMB_ADD;
335	case PIPE_BLEND_SUBTRACT:
336		return V_028760_OPT_COMB_SUBTRACT;
337	case PIPE_BLEND_REVERSE_SUBTRACT:
338		return V_028760_OPT_COMB_REVSUBTRACT;
339	case PIPE_BLEND_MIN:
340		return V_028760_OPT_COMB_MIN;
341	case PIPE_BLEND_MAX:
342		return V_028760_OPT_COMB_MAX;
343	default:
344		return V_028760_OPT_COMB_BLEND_DISABLED;
345	}
346}
347
348static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
349{
350	switch (blend_fact) {
351	case PIPE_BLENDFACTOR_ZERO:
352		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
353	case PIPE_BLENDFACTOR_ONE:
354		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
355	case PIPE_BLENDFACTOR_SRC_COLOR:
356		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
357				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
358	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
359		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
360				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
361	case PIPE_BLENDFACTOR_SRC_ALPHA:
362		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
363	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
364		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
365	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
366		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
367				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
368	default:
369		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
370	}
371}
372
373/**
374 * Get rid of DST in the blend factors by commuting the operands:
375 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
376 */
377static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
378				unsigned *dst_factor, unsigned expected_dst,
379				unsigned replacement_src)
380{
381	if (*src_factor == expected_dst &&
382	    *dst_factor == PIPE_BLENDFACTOR_ZERO) {
383		*src_factor = PIPE_BLENDFACTOR_ZERO;
384		*dst_factor = replacement_src;
385
386		/* Commuting the operands requires reversing subtractions. */
387		if (*func == PIPE_BLEND_SUBTRACT)
388			*func = PIPE_BLEND_REVERSE_SUBTRACT;
389		else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
390			*func = PIPE_BLEND_SUBTRACT;
391	}
392}
393
394static bool si_blend_factor_uses_dst(unsigned factor)
395{
396	return factor == PIPE_BLENDFACTOR_DST_COLOR ||
397		factor == PIPE_BLENDFACTOR_DST_ALPHA ||
398		factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
399		factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
400		factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
401}
402
403static void *si_create_blend_state_mode(struct pipe_context *ctx,
404					const struct pipe_blend_state *state,
405					unsigned mode)
406{
407	struct si_context *sctx = (struct si_context*)ctx;
408	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
409	struct si_pm4_state *pm4 = &blend->pm4;
410	uint32_t sx_mrt_blend_opt[8] = {0};
411	uint32_t color_control = 0;
412
413	if (!blend)
414		return NULL;
415
416	blend->alpha_to_coverage = state->alpha_to_coverage;
417	blend->alpha_to_one = state->alpha_to_one;
418	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
419
420	if (state->logicop_enable) {
421		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
422	} else {
423		color_control |= S_028808_ROP3(0xcc);
424	}
425
426	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
427		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
428		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
429		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
430		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
431		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
432
433	if (state->alpha_to_coverage)
434		blend->need_src_alpha_4bit |= 0xf;
435
436	blend->cb_target_mask = 0;
437	for (int i = 0; i < 8; i++) {
438		/* state->rt entries > 0 only written if independent blending */
439		const int j = state->independent_blend_enable ? i : 0;
440
441		unsigned eqRGB = state->rt[j].rgb_func;
442		unsigned srcRGB = state->rt[j].rgb_src_factor;
443		unsigned dstRGB = state->rt[j].rgb_dst_factor;
444		unsigned eqA = state->rt[j].alpha_func;
445		unsigned srcA = state->rt[j].alpha_src_factor;
446		unsigned dstA = state->rt[j].alpha_dst_factor;
447
448		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
449		unsigned blend_cntl = 0;
450
451		sx_mrt_blend_opt[i] =
452			S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
453			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
454
455		/* Only set dual source blending for MRT0 to avoid a hang. */
456		if (i >= 1 && blend->dual_src_blend) {
457			/* Vulkan does this for dual source blending. */
458			if (i == 1)
459				blend_cntl |= S_028780_ENABLE(1);
460
461			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
462			continue;
463		}
464
465		/* Only addition and subtraction equations are supported with
466		 * dual source blending.
467		 */
468		if (blend->dual_src_blend &&
469		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
470		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
471			assert(!"Unsupported equation for dual source blending");
472			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
473			continue;
474		}
475
476		/* cb_render_state will disable unused ones */
477		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
478
479		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
480			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
481			continue;
482		}
483
484		/* Blending optimizations for Stoney.
485		 * These transformations don't change the behavior.
486		 *
487		 * First, get rid of DST in the blend factors:
488		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
489		 */
490		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
491				    PIPE_BLENDFACTOR_DST_COLOR,
492				    PIPE_BLENDFACTOR_SRC_COLOR);
493		si_blend_remove_dst(&eqA, &srcA, &dstA,
494				    PIPE_BLENDFACTOR_DST_COLOR,
495				    PIPE_BLENDFACTOR_SRC_COLOR);
496		si_blend_remove_dst(&eqA, &srcA, &dstA,
497				    PIPE_BLENDFACTOR_DST_ALPHA,
498				    PIPE_BLENDFACTOR_SRC_ALPHA);
499
500		/* Look up the ideal settings from tables. */
501		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
502		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
503		srcA_opt = si_translate_blend_opt_factor(srcA, true);
504		dstA_opt = si_translate_blend_opt_factor(dstA, true);
505
506		/* Handle interdependencies. */
507		if (si_blend_factor_uses_dst(srcRGB))
508			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
509		if (si_blend_factor_uses_dst(srcA))
510			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
511
512		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
513		    (dstRGB == PIPE_BLENDFACTOR_ZERO ||
514		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
515		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
516			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
517
518		/* Set the final value. */
519		sx_mrt_blend_opt[i] =
520			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
521			S_028760_COLOR_DST_OPT(dstRGB_opt) |
522			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
523			S_028760_ALPHA_SRC_OPT(srcA_opt) |
524			S_028760_ALPHA_DST_OPT(dstA_opt) |
525			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
526
527		/* Set blend state. */
528		blend_cntl |= S_028780_ENABLE(1);
529		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
530		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
531		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
532
533		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
534			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
535			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
536			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
537			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
538		}
539		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
540
541		blend->blend_enable_4bit |= 0xfu << (i * 4);
542
543		/* This is only important for formats without alpha. */
544		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
545		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
546		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
547		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
548		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
549		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
550			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
551	}
552
553	if (blend->cb_target_mask) {
554		color_control |= S_028808_MODE(mode);
555	} else {
556		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
557	}
558
559	if (sctx->b.family == CHIP_STONEY) {
560		/* Disable RB+ blend optimizations for dual source blending.
561		 * Vulkan does this.
562		 */
563		if (blend->dual_src_blend) {
564			for (int i = 0; i < 8; i++) {
565				sx_mrt_blend_opt[i] =
566					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
567					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
568			}
569		}
570
571		for (int i = 0; i < 8; i++)
572			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
573				       sx_mrt_blend_opt[i]);
574
575		/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
576		if (blend->dual_src_blend || state->logicop_enable ||
577		    mode == V_028808_CB_RESOLVE)
578			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
579	}
580
581	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
582	return blend;
583}
584
585static void *si_create_blend_state(struct pipe_context *ctx,
586				   const struct pipe_blend_state *state)
587{
588	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
589}
590
591static void si_bind_blend_state(struct pipe_context *ctx, void *state)
592{
593	struct si_context *sctx = (struct si_context *)ctx;
594	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
595	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
596	sctx->do_update_shaders = true;
597}
598
599static void si_delete_blend_state(struct pipe_context *ctx, void *state)
600{
601	struct si_context *sctx = (struct si_context *)ctx;
602	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
603}
604
605static void si_set_blend_color(struct pipe_context *ctx,
606			       const struct pipe_blend_color *state)
607{
608	struct si_context *sctx = (struct si_context *)ctx;
609
610	if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
611		return;
612
613	sctx->blend_color.state = *state;
614	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
615}
616
617static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
618{
619	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
620
621	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
622	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
623}
624
625/*
626 * Clipping
627 */
628
629static void si_set_clip_state(struct pipe_context *ctx,
630			      const struct pipe_clip_state *state)
631{
632	struct si_context *sctx = (struct si_context *)ctx;
633	struct pipe_constant_buffer cb;
634
635	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
636		return;
637
638	sctx->clip_state.state = *state;
639	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
640
641	cb.buffer = NULL;
642	cb.user_buffer = state->ucp;
643	cb.buffer_offset = 0;
644	cb.buffer_size = 4*4*8;
645	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
646	pipe_resource_reference(&cb.buffer, NULL);
647}
648
649static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
650{
651	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
652
653	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
654	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
655}
656
657#define SIX_BITS 0x3F
658
659static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
660{
661	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
662	struct si_shader *vs = si_get_vs_state(sctx);
663	struct tgsi_shader_info *info = si_get_vs_info(sctx);
664	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
665	unsigned window_space =
666	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
667	unsigned clipdist_mask =
668		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
669	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
670	unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance;
671	unsigned total_mask;
672	bool misc_vec_ena;
673
674	if (vs->key.opt.hw_vs.clip_disable) {
675		assert(!info->culldist_writemask);
676		clipdist_mask = 0;
677		culldist_mask = 0;
678	}
679	total_mask = clipdist_mask | culldist_mask;
680
681	/* Clip distances on points have no effect, so need to be implemented
682	 * as cull distances. This applies for the clipvertex case as well.
683	 *
684	 * Setting this for primitives other than points should have no adverse
685	 * effects.
686	 */
687	clipdist_mask &= rs->clip_plane_enable;
688	culldist_mask |= clipdist_mask;
689
690	misc_vec_ena = info->writes_psize || info->writes_edgeflag ||
691		       info->writes_layer || info->writes_viewport_index;
692
693	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
694		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
695		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
696		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
697	        S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
698		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
699		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
700		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
701		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
702		clipdist_mask | (culldist_mask << 8));
703	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
704		rs->pa_cl_clip_cntl |
705		ucp_mask |
706		S_028810_CLIP_DISABLE(window_space));
707
708	/* reuse needs to be set off if we write oViewport */
709	radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
710			       S_028AB4_REUSE_OFF(info->writes_viewport_index));
711}
712
713/*
714 * inferred state between framebuffer and rasterizer
715 */
716static void si_update_poly_offset_state(struct si_context *sctx)
717{
718	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
719
720	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
721		return;
722
723	/* Use the user format, not db_render_format, so that the polygon
724	 * offset behaves as expected by applications.
725	 */
726	switch (sctx->framebuffer.state.zsbuf->texture->format) {
727	case PIPE_FORMAT_Z16_UNORM:
728		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
729		break;
730	default: /* 24-bit */
731		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
732		break;
733	case PIPE_FORMAT_Z32_FLOAT:
734	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
735		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
736		break;
737	}
738}
739
740/*
741 * Rasterizer
742 */
743
744static uint32_t si_translate_fill(uint32_t func)
745{
746	switch(func) {
747	case PIPE_POLYGON_MODE_FILL:
748		return V_028814_X_DRAW_TRIANGLES;
749	case PIPE_POLYGON_MODE_LINE:
750		return V_028814_X_DRAW_LINES;
751	case PIPE_POLYGON_MODE_POINT:
752		return V_028814_X_DRAW_POINTS;
753	default:
754		assert(0);
755		return V_028814_X_DRAW_POINTS;
756	}
757}
758
759static void *si_create_rs_state(struct pipe_context *ctx,
760				const struct pipe_rasterizer_state *state)
761{
762	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
763	struct si_pm4_state *pm4 = &rs->pm4;
764	unsigned tmp, i;
765	float psize_min, psize_max;
766
767	if (!rs) {
768		return NULL;
769	}
770
771	rs->scissor_enable = state->scissor;
772	rs->clip_halfz = state->clip_halfz;
773	rs->two_side = state->light_twoside;
774	rs->multisample_enable = state->multisample;
775	rs->force_persample_interp = state->force_persample_interp;
776	rs->clip_plane_enable = state->clip_plane_enable;
777	rs->line_stipple_enable = state->line_stipple_enable;
778	rs->poly_stipple_enable = state->poly_stipple_enable;
779	rs->line_smooth = state->line_smooth;
780	rs->poly_smooth = state->poly_smooth;
781	rs->uses_poly_offset = state->offset_point || state->offset_line ||
782			       state->offset_tri;
783	rs->clamp_fragment_color = state->clamp_fragment_color;
784	rs->flatshade = state->flatshade;
785	rs->sprite_coord_enable = state->sprite_coord_enable;
786	rs->rasterizer_discard = state->rasterizer_discard;
787	rs->pa_sc_line_stipple = state->line_stipple_enable ?
788				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
789				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
790	rs->pa_cl_clip_cntl =
791		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
792		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
793		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
794		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
795		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
796
797	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
798		S_0286D4_FLAT_SHADE_ENA(1) |
799		S_0286D4_PNT_SPRITE_ENA(1) |
800		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
801		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
802		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
803		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
804		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
805
806	/* point size 12.4 fixed point */
807	tmp = (unsigned)(state->point_size * 8.0);
808	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
809
810	if (state->point_size_per_vertex) {
811		psize_min = util_get_min_point_size(state);
812		psize_max = 8192;
813	} else {
814		/* Force the point size to be as if the vertex output was disabled. */
815		psize_min = state->point_size;
816		psize_max = state->point_size;
817	}
818	/* Divide by two, because 0.5 = 1 pixel. */
819	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
820			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
821			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
822
823	tmp = (unsigned)state->line_width * 8;
824	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
825	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
826		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
827		       S_028A48_MSAA_ENABLE(state->multisample ||
828					    state->poly_smooth ||
829					    state->line_smooth) |
830		       S_028A48_VPORT_SCISSOR_ENABLE(1));
831
832	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
833		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
834		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
835
836	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
837	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
838		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
839		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
840		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
841		S_028814_FACE(!state->front_ccw) |
842		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
843		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
844		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
845		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
846				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
847		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
848		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
849	si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
850		       SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
851
852	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
853	for (i = 0; i < 3; i++) {
854		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
855		float offset_units = state->offset_units;
856		float offset_scale = state->offset_scale * 16.0f;
857		uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
858
859		if (!state->offset_units_unscaled) {
860			switch (i) {
861			case 0: /* 16-bit zbuffer */
862				offset_units *= 4.0f;
863				pa_su_poly_offset_db_fmt_cntl =
864					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
865				break;
866			case 1: /* 24-bit zbuffer */
867				offset_units *= 2.0f;
868				pa_su_poly_offset_db_fmt_cntl =
869					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
870				break;
871			case 2: /* 32-bit zbuffer */
872				offset_units *= 1.0f;
873				pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
874								S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
875				break;
876			}
877		}
878
879		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
880			       fui(offset_scale));
881		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
882			       fui(offset_units));
883		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
884			       fui(offset_scale));
885		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
886			       fui(offset_units));
887		si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
888			       pa_su_poly_offset_db_fmt_cntl);
889	}
890
891	return rs;
892}
893
894static void si_bind_rs_state(struct pipe_context *ctx, void *state)
895{
896	struct si_context *sctx = (struct si_context *)ctx;
897	struct si_state_rasterizer *old_rs =
898		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
899	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
900
901	if (!state)
902		return;
903
904	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
905		si_mark_atom_dirty(sctx, &sctx->db_render_state);
906
907		/* Update the small primitive filter workaround if necessary. */
908		if (sctx->b.family >= CHIP_POLARIS10 &&
909		    sctx->framebuffer.nr_samples > 1)
910			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
911	}
912
913	r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
914
915	si_pm4_bind_state(sctx, rasterizer, rs);
916	si_update_poly_offset_state(sctx);
917
918	si_mark_atom_dirty(sctx, &sctx->clip_regs);
919	sctx->do_update_shaders = true;
920}
921
922static void si_delete_rs_state(struct pipe_context *ctx, void *state)
923{
924	struct si_context *sctx = (struct si_context *)ctx;
925
926	if (sctx->queued.named.rasterizer == state)
927		si_pm4_bind_state(sctx, poly_offset, NULL);
928	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
929}
930
931/*
932 * infeered state between dsa and stencil ref
933 */
934static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
935{
936	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
937	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
938	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
939
940	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
941	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
942			S_028430_STENCILMASK(dsa->valuemask[0]) |
943			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
944			S_028430_STENCILOPVAL(1));
945	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
946			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
947			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
948			S_028434_STENCILOPVAL_BF(1));
949}
950
951static void si_set_stencil_ref(struct pipe_context *ctx,
952			       const struct pipe_stencil_ref *state)
953{
954        struct si_context *sctx = (struct si_context *)ctx;
955
956	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
957		return;
958
959	sctx->stencil_ref.state = *state;
960	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
961}
962
963
964/*
965 * DSA
966 */
967
968static uint32_t si_translate_stencil_op(int s_op)
969{
970	switch (s_op) {
971	case PIPE_STENCIL_OP_KEEP:
972		return V_02842C_STENCIL_KEEP;
973	case PIPE_STENCIL_OP_ZERO:
974		return V_02842C_STENCIL_ZERO;
975	case PIPE_STENCIL_OP_REPLACE:
976		return V_02842C_STENCIL_REPLACE_TEST;
977	case PIPE_STENCIL_OP_INCR:
978		return V_02842C_STENCIL_ADD_CLAMP;
979	case PIPE_STENCIL_OP_DECR:
980		return V_02842C_STENCIL_SUB_CLAMP;
981	case PIPE_STENCIL_OP_INCR_WRAP:
982		return V_02842C_STENCIL_ADD_WRAP;
983	case PIPE_STENCIL_OP_DECR_WRAP:
984		return V_02842C_STENCIL_SUB_WRAP;
985	case PIPE_STENCIL_OP_INVERT:
986		return V_02842C_STENCIL_INVERT;
987	default:
988		R600_ERR("Unknown stencil op %d", s_op);
989		assert(0);
990		break;
991	}
992	return 0;
993}
994
995static void *si_create_dsa_state(struct pipe_context *ctx,
996				 const struct pipe_depth_stencil_alpha_state *state)
997{
998	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
999	struct si_pm4_state *pm4 = &dsa->pm4;
1000	unsigned db_depth_control;
1001	uint32_t db_stencil_control = 0;
1002
1003	if (!dsa) {
1004		return NULL;
1005	}
1006
1007	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
1008	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
1009	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
1010	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
1011
1012	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
1013		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
1014		S_028800_ZFUNC(state->depth.func) |
1015		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
1016
1017	/* stencil */
1018	if (state->stencil[0].enabled) {
1019		db_depth_control |= S_028800_STENCIL_ENABLE(1);
1020		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
1021		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
1022		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
1023		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
1024
1025		if (state->stencil[1].enabled) {
1026			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
1027			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
1028			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
1029			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
1030			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
1031		}
1032	}
1033
1034	/* alpha */
1035	if (state->alpha.enabled) {
1036		dsa->alpha_func = state->alpha.func;
1037
1038		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
1039		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
1040	} else {
1041		dsa->alpha_func = PIPE_FUNC_ALWAYS;
1042	}
1043
1044	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
1045	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
1046	if (state->depth.bounds_test) {
1047		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
1048		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
1049	}
1050
1051	return dsa;
1052}
1053
1054static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1055{
1056        struct si_context *sctx = (struct si_context *)ctx;
1057        struct si_state_dsa *dsa = state;
1058
1059        if (!state)
1060                return;
1061
1062	si_pm4_bind_state(sctx, dsa, dsa);
1063
1064	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1065		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1066		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1067		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
1068	}
1069	sctx->do_update_shaders = true;
1070}
1071
1072static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1073{
1074	struct si_context *sctx = (struct si_context *)ctx;
1075	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
1076}
1077
1078static void *si_create_db_flush_dsa(struct si_context *sctx)
1079{
1080	struct pipe_depth_stencil_alpha_state dsa = {};
1081
1082	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
1083}
1084
1085/* DB RENDER STATE */
1086
1087static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
1088{
1089	struct si_context *sctx = (struct si_context*)ctx;
1090
1091	/* Pipeline stat & streamout queries. */
1092	if (enable) {
1093		sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
1094		sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
1095	} else {
1096		sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
1097		sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
1098	}
1099
1100	/* Occlusion queries. */
1101	if (sctx->occlusion_queries_disabled != !enable) {
1102		sctx->occlusion_queries_disabled = !enable;
1103		si_mark_atom_dirty(sctx, &sctx->db_render_state);
1104	}
1105}
1106
1107static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
1108{
1109	struct si_context *sctx = (struct si_context*)ctx;
1110
1111	si_mark_atom_dirty(sctx, &sctx->db_render_state);
1112}
1113
1114static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
1115{
1116	struct si_context *sctx = (struct si_context*)ctx;
1117
1118	st->saved_compute = sctx->cs_shader_state.program;
1119
1120	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1121	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1122}
1123
1124static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
1125{
1126	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1127	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1128	unsigned db_shader_control;
1129
1130	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
1131
1132	/* DB_RENDER_CONTROL */
1133	if (sctx->dbcb_depth_copy_enabled ||
1134	    sctx->dbcb_stencil_copy_enabled) {
1135		radeon_emit(cs,
1136			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1137			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1138			    S_028000_COPY_CENTROID(1) |
1139			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
1140	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1141		radeon_emit(cs,
1142			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1143			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
1144	} else {
1145		radeon_emit(cs,
1146			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1147			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
1148	}
1149
1150	/* DB_COUNT_CONTROL (occlusion queries) */
1151	if (sctx->b.num_occlusion_queries > 0 &&
1152	    !sctx->occlusion_queries_disabled) {
1153		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
1154
1155		if (sctx->b.chip_class >= CIK) {
1156			radeon_emit(cs,
1157				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1158				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
1159				    S_028004_ZPASS_ENABLE(1) |
1160				    S_028004_SLICE_EVEN_ENABLE(1) |
1161				    S_028004_SLICE_ODD_ENABLE(1));
1162		} else {
1163			radeon_emit(cs,
1164				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1165				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
1166		}
1167	} else {
1168		/* Disable occlusion queries. */
1169		if (sctx->b.chip_class >= CIK) {
1170			radeon_emit(cs, 0);
1171		} else {
1172			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
1173		}
1174	}
1175
1176	/* DB_RENDER_OVERRIDE2 */
1177	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
1178		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1179		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1180		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
1181
1182	db_shader_control = sctx->ps_db_shader_control;
1183
1184	/* Bug workaround for smoothing (overrasterization) on SI. */
1185	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
1186		db_shader_control &= C_02880C_Z_ORDER;
1187		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1188	}
1189
1190	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1191	if (!rs || !rs->multisample_enable)
1192		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1193
1194	if (sctx->b.family == CHIP_STONEY &&
1195	    sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
1196		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
1197
1198	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
1199			       db_shader_control);
1200}
1201
1202/*
1203 * format translation
1204 */
1205static uint32_t si_translate_colorformat(enum pipe_format format)
1206{
1207	const struct util_format_description *desc = util_format_description(format);
1208
1209#define HAS_SIZE(x,y,z,w) \
1210	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1211         desc->channel[2].size == (z) && desc->channel[3].size == (w))
1212
1213	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1214		return V_028C70_COLOR_10_11_11;
1215
1216	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1217		return V_028C70_COLOR_INVALID;
1218
1219	/* hw cannot support mixed formats (except depth/stencil, since
1220	 * stencil is not written to). */
1221	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1222		return V_028C70_COLOR_INVALID;
1223
1224	switch (desc->nr_channels) {
1225	case 1:
1226		switch (desc->channel[0].size) {
1227		case 8:
1228			return V_028C70_COLOR_8;
1229		case 16:
1230			return V_028C70_COLOR_16;
1231		case 32:
1232			return V_028C70_COLOR_32;
1233		}
1234		break;
1235	case 2:
1236		if (desc->channel[0].size == desc->channel[1].size) {
1237			switch (desc->channel[0].size) {
1238			case 8:
1239				return V_028C70_COLOR_8_8;
1240			case 16:
1241				return V_028C70_COLOR_16_16;
1242			case 32:
1243				return V_028C70_COLOR_32_32;
1244			}
1245		} else if (HAS_SIZE(8,24,0,0)) {
1246			return V_028C70_COLOR_24_8;
1247		} else if (HAS_SIZE(24,8,0,0)) {
1248			return V_028C70_COLOR_8_24;
1249		}
1250		break;
1251	case 3:
1252		if (HAS_SIZE(5,6,5,0)) {
1253			return V_028C70_COLOR_5_6_5;
1254		} else if (HAS_SIZE(32,8,24,0)) {
1255			return V_028C70_COLOR_X24_8_32_FLOAT;
1256		}
1257		break;
1258	case 4:
1259		if (desc->channel[0].size == desc->channel[1].size &&
1260		    desc->channel[0].size == desc->channel[2].size &&
1261		    desc->channel[0].size == desc->channel[3].size) {
1262			switch (desc->channel[0].size) {
1263			case 4:
1264				return V_028C70_COLOR_4_4_4_4;
1265			case 8:
1266				return V_028C70_COLOR_8_8_8_8;
1267			case 16:
1268				return V_028C70_COLOR_16_16_16_16;
1269			case 32:
1270				return V_028C70_COLOR_32_32_32_32;
1271			}
1272		} else if (HAS_SIZE(5,5,5,1)) {
1273			return V_028C70_COLOR_1_5_5_5;
1274		} else if (HAS_SIZE(10,10,10,2)) {
1275			return V_028C70_COLOR_2_10_10_10;
1276		}
1277		break;
1278	}
1279	return V_028C70_COLOR_INVALID;
1280}
1281
1282static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1283{
1284	if (SI_BIG_ENDIAN) {
1285		switch(colorformat) {
1286		/* 8-bit buffers. */
1287		case V_028C70_COLOR_8:
1288			return V_028C70_ENDIAN_NONE;
1289
1290		/* 16-bit buffers. */
1291		case V_028C70_COLOR_5_6_5:
1292		case V_028C70_COLOR_1_5_5_5:
1293		case V_028C70_COLOR_4_4_4_4:
1294		case V_028C70_COLOR_16:
1295		case V_028C70_COLOR_8_8:
1296			return V_028C70_ENDIAN_8IN16;
1297
1298		/* 32-bit buffers. */
1299		case V_028C70_COLOR_8_8_8_8:
1300		case V_028C70_COLOR_2_10_10_10:
1301		case V_028C70_COLOR_8_24:
1302		case V_028C70_COLOR_24_8:
1303		case V_028C70_COLOR_16_16:
1304			return V_028C70_ENDIAN_8IN32;
1305
1306		/* 64-bit buffers. */
1307		case V_028C70_COLOR_16_16_16_16:
1308			return V_028C70_ENDIAN_8IN16;
1309
1310		case V_028C70_COLOR_32_32:
1311			return V_028C70_ENDIAN_8IN32;
1312
1313		/* 128-bit buffers. */
1314		case V_028C70_COLOR_32_32_32_32:
1315			return V_028C70_ENDIAN_8IN32;
1316		default:
1317			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1318		}
1319	} else {
1320		return V_028C70_ENDIAN_NONE;
1321	}
1322}
1323
1324static uint32_t si_translate_dbformat(enum pipe_format format)
1325{
1326	switch (format) {
1327	case PIPE_FORMAT_Z16_UNORM:
1328		return V_028040_Z_16;
1329	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1330	case PIPE_FORMAT_X8Z24_UNORM:
1331	case PIPE_FORMAT_Z24X8_UNORM:
1332	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1333		return V_028040_Z_24; /* deprecated on SI */
1334	case PIPE_FORMAT_Z32_FLOAT:
1335	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1336		return V_028040_Z_32_FLOAT;
1337	default:
1338		return V_028040_Z_INVALID;
1339	}
1340}
1341
1342/*
1343 * Texture translation
1344 */
1345
1346static uint32_t si_translate_texformat(struct pipe_screen *screen,
1347				       enum pipe_format format,
1348				       const struct util_format_description *desc,
1349				       int first_non_void)
1350{
1351	struct si_screen *sscreen = (struct si_screen*)screen;
1352	bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
1353					  sscreen->b.info.drm_minor >= 31) ||
1354					 sscreen->b.info.drm_major == 3;
1355	bool uniform = true;
1356	int i;
1357
1358	/* Colorspace (return non-RGB formats directly). */
1359	switch (desc->colorspace) {
1360	/* Depth stencil formats */
1361	case UTIL_FORMAT_COLORSPACE_ZS:
1362		switch (format) {
1363		case PIPE_FORMAT_Z16_UNORM:
1364			return V_008F14_IMG_DATA_FORMAT_16;
1365		case PIPE_FORMAT_X24S8_UINT:
1366		case PIPE_FORMAT_Z24X8_UNORM:
1367		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1368			return V_008F14_IMG_DATA_FORMAT_8_24;
1369		case PIPE_FORMAT_X8Z24_UNORM:
1370		case PIPE_FORMAT_S8X24_UINT:
1371		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1372			return V_008F14_IMG_DATA_FORMAT_24_8;
1373		case PIPE_FORMAT_S8_UINT:
1374			return V_008F14_IMG_DATA_FORMAT_8;
1375		case PIPE_FORMAT_Z32_FLOAT:
1376			return V_008F14_IMG_DATA_FORMAT_32;
1377		case PIPE_FORMAT_X32_S8X24_UINT:
1378		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1379			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1380		default:
1381			goto out_unknown;
1382		}
1383
1384	case UTIL_FORMAT_COLORSPACE_YUV:
1385		goto out_unknown; /* TODO */
1386
1387	case UTIL_FORMAT_COLORSPACE_SRGB:
1388		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1389			goto out_unknown;
1390		break;
1391
1392	default:
1393		break;
1394	}
1395
1396	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1397		if (!enable_compressed_formats)
1398			goto out_unknown;
1399
1400		switch (format) {
1401		case PIPE_FORMAT_RGTC1_SNORM:
1402		case PIPE_FORMAT_LATC1_SNORM:
1403		case PIPE_FORMAT_RGTC1_UNORM:
1404		case PIPE_FORMAT_LATC1_UNORM:
1405			return V_008F14_IMG_DATA_FORMAT_BC4;
1406		case PIPE_FORMAT_RGTC2_SNORM:
1407		case PIPE_FORMAT_LATC2_SNORM:
1408		case PIPE_FORMAT_RGTC2_UNORM:
1409		case PIPE_FORMAT_LATC2_UNORM:
1410			return V_008F14_IMG_DATA_FORMAT_BC5;
1411		default:
1412			goto out_unknown;
1413		}
1414	}
1415
1416	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1417	    sscreen->b.family == CHIP_STONEY) {
1418		switch (format) {
1419		case PIPE_FORMAT_ETC1_RGB8:
1420		case PIPE_FORMAT_ETC2_RGB8:
1421		case PIPE_FORMAT_ETC2_SRGB8:
1422			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1423		case PIPE_FORMAT_ETC2_RGB8A1:
1424		case PIPE_FORMAT_ETC2_SRGB8A1:
1425			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1426		case PIPE_FORMAT_ETC2_RGBA8:
1427		case PIPE_FORMAT_ETC2_SRGBA8:
1428			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1429		case PIPE_FORMAT_ETC2_R11_UNORM:
1430		case PIPE_FORMAT_ETC2_R11_SNORM:
1431			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1432		case PIPE_FORMAT_ETC2_RG11_UNORM:
1433		case PIPE_FORMAT_ETC2_RG11_SNORM:
1434			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1435		default:
1436			goto out_unknown;
1437		}
1438	}
1439
1440	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1441		if (!enable_compressed_formats)
1442			goto out_unknown;
1443
1444		switch (format) {
1445		case PIPE_FORMAT_BPTC_RGBA_UNORM:
1446		case PIPE_FORMAT_BPTC_SRGBA:
1447			return V_008F14_IMG_DATA_FORMAT_BC7;
1448		case PIPE_FORMAT_BPTC_RGB_FLOAT:
1449		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1450			return V_008F14_IMG_DATA_FORMAT_BC6;
1451		default:
1452			goto out_unknown;
1453		}
1454	}
1455
1456	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1457		switch (format) {
1458		case PIPE_FORMAT_R8G8_B8G8_UNORM:
1459		case PIPE_FORMAT_G8R8_B8R8_UNORM:
1460			return V_008F14_IMG_DATA_FORMAT_GB_GR;
1461		case PIPE_FORMAT_G8R8_G8B8_UNORM:
1462		case PIPE_FORMAT_R8G8_R8B8_UNORM:
1463			return V_008F14_IMG_DATA_FORMAT_BG_RG;
1464		default:
1465			goto out_unknown;
1466		}
1467	}
1468
1469	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1470		if (!enable_compressed_formats)
1471			goto out_unknown;
1472
1473		if (!util_format_s3tc_enabled) {
1474			goto out_unknown;
1475		}
1476
1477		switch (format) {
1478		case PIPE_FORMAT_DXT1_RGB:
1479		case PIPE_FORMAT_DXT1_RGBA:
1480		case PIPE_FORMAT_DXT1_SRGB:
1481		case PIPE_FORMAT_DXT1_SRGBA:
1482			return V_008F14_IMG_DATA_FORMAT_BC1;
1483		case PIPE_FORMAT_DXT3_RGBA:
1484		case PIPE_FORMAT_DXT3_SRGBA:
1485			return V_008F14_IMG_DATA_FORMAT_BC2;
1486		case PIPE_FORMAT_DXT5_RGBA:
1487		case PIPE_FORMAT_DXT5_SRGBA:
1488			return V_008F14_IMG_DATA_FORMAT_BC3;
1489		default:
1490			goto out_unknown;
1491		}
1492	}
1493
1494	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1495		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1496	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1497		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1498	}
1499
1500	/* R8G8Bx_SNORM - TODO CxV8U8 */
1501
1502	/* hw cannot support mixed formats (except depth/stencil, since only
1503	 * depth is read).*/
1504	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1505		goto out_unknown;
1506
1507	/* See whether the components are of the same size. */
1508	for (i = 1; i < desc->nr_channels; i++) {
1509		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1510	}
1511
1512	/* Non-uniform formats. */
1513	if (!uniform) {
1514		switch(desc->nr_channels) {
1515		case 3:
1516			if (desc->channel[0].size == 5 &&
1517			    desc->channel[1].size == 6 &&
1518			    desc->channel[2].size == 5) {
1519				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1520			}
1521			goto out_unknown;
1522		case 4:
1523			if (desc->channel[0].size == 5 &&
1524			    desc->channel[1].size == 5 &&
1525			    desc->channel[2].size == 5 &&
1526			    desc->channel[3].size == 1) {
1527				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1528			}
1529			if (desc->channel[0].size == 10 &&
1530			    desc->channel[1].size == 10 &&
1531			    desc->channel[2].size == 10 &&
1532			    desc->channel[3].size == 2) {
1533				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1534			}
1535			goto out_unknown;
1536		}
1537		goto out_unknown;
1538	}
1539
1540	if (first_non_void < 0 || first_non_void > 3)
1541		goto out_unknown;
1542
1543	/* uniform formats */
1544	switch (desc->channel[first_non_void].size) {
1545	case 4:
1546		switch (desc->nr_channels) {
1547#if 0 /* Not supported for render targets */
1548		case 2:
1549			return V_008F14_IMG_DATA_FORMAT_4_4;
1550#endif
1551		case 4:
1552			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1553		}
1554		break;
1555	case 8:
1556		switch (desc->nr_channels) {
1557		case 1:
1558			return V_008F14_IMG_DATA_FORMAT_8;
1559		case 2:
1560			return V_008F14_IMG_DATA_FORMAT_8_8;
1561		case 4:
1562			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1563		}
1564		break;
1565	case 16:
1566		switch (desc->nr_channels) {
1567		case 1:
1568			return V_008F14_IMG_DATA_FORMAT_16;
1569		case 2:
1570			return V_008F14_IMG_DATA_FORMAT_16_16;
1571		case 4:
1572			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1573		}
1574		break;
1575	case 32:
1576		switch (desc->nr_channels) {
1577		case 1:
1578			return V_008F14_IMG_DATA_FORMAT_32;
1579		case 2:
1580			return V_008F14_IMG_DATA_FORMAT_32_32;
1581#if 0 /* Not supported for render targets */
1582		case 3:
1583			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1584#endif
1585		case 4:
1586			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1587		}
1588	}
1589
1590out_unknown:
1591	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1592	return ~0;
1593}
1594
1595static unsigned si_tex_wrap(unsigned wrap)
1596{
1597	switch (wrap) {
1598	default:
1599	case PIPE_TEX_WRAP_REPEAT:
1600		return V_008F30_SQ_TEX_WRAP;
1601	case PIPE_TEX_WRAP_CLAMP:
1602		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1603	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1604		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1605	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1606		return V_008F30_SQ_TEX_CLAMP_BORDER;
1607	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1608		return V_008F30_SQ_TEX_MIRROR;
1609	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1610		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1611	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1612		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1613	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1614		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1615	}
1616}
1617
1618static unsigned si_tex_mipfilter(unsigned filter)
1619{
1620	switch (filter) {
1621	case PIPE_TEX_MIPFILTER_NEAREST:
1622		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1623	case PIPE_TEX_MIPFILTER_LINEAR:
1624		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1625	default:
1626	case PIPE_TEX_MIPFILTER_NONE:
1627		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1628	}
1629}
1630
1631static unsigned si_tex_compare(unsigned compare)
1632{
1633	switch (compare) {
1634	default:
1635	case PIPE_FUNC_NEVER:
1636		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1637	case PIPE_FUNC_LESS:
1638		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1639	case PIPE_FUNC_EQUAL:
1640		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1641	case PIPE_FUNC_LEQUAL:
1642		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1643	case PIPE_FUNC_GREATER:
1644		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1645	case PIPE_FUNC_NOTEQUAL:
1646		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1647	case PIPE_FUNC_GEQUAL:
1648		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1649	case PIPE_FUNC_ALWAYS:
1650		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1651	}
1652}
1653
1654static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
1655			   unsigned nr_samples)
1656{
1657	if (view_target == PIPE_TEXTURE_CUBE ||
1658	    view_target == PIPE_TEXTURE_CUBE_ARRAY)
1659		res_target = view_target;
1660	/* If interpreting cubemaps as something else, set 2D_ARRAY. */
1661	else if (res_target == PIPE_TEXTURE_CUBE ||
1662		 res_target == PIPE_TEXTURE_CUBE_ARRAY)
1663		res_target = PIPE_TEXTURE_2D_ARRAY;
1664
1665	switch (res_target) {
1666	default:
1667	case PIPE_TEXTURE_1D:
1668		return V_008F1C_SQ_RSRC_IMG_1D;
1669	case PIPE_TEXTURE_1D_ARRAY:
1670		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1671	case PIPE_TEXTURE_2D:
1672	case PIPE_TEXTURE_RECT:
1673		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1674					V_008F1C_SQ_RSRC_IMG_2D;
1675	case PIPE_TEXTURE_2D_ARRAY:
1676		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1677					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1678	case PIPE_TEXTURE_3D:
1679		return V_008F1C_SQ_RSRC_IMG_3D;
1680	case PIPE_TEXTURE_CUBE:
1681	case PIPE_TEXTURE_CUBE_ARRAY:
1682		return V_008F1C_SQ_RSRC_IMG_CUBE;
1683	}
1684}
1685
1686/*
1687 * Format support testing
1688 */
1689
1690static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1691{
1692	return si_translate_texformat(screen, format, util_format_description(format),
1693				      util_format_get_first_non_void_channel(format)) != ~0U;
1694}
1695
1696static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1697					       const struct util_format_description *desc,
1698					       int first_non_void)
1699{
1700	int i;
1701
1702	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1703		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1704
1705	assert(first_non_void >= 0);
1706
1707	if (desc->nr_channels == 4 &&
1708	    desc->channel[0].size == 10 &&
1709	    desc->channel[1].size == 10 &&
1710	    desc->channel[2].size == 10 &&
1711	    desc->channel[3].size == 2)
1712		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1713
1714	/* See whether the components are of the same size. */
1715	for (i = 0; i < desc->nr_channels; i++) {
1716		if (desc->channel[first_non_void].size != desc->channel[i].size)
1717			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1718	}
1719
1720	switch (desc->channel[first_non_void].size) {
1721	case 8:
1722		switch (desc->nr_channels) {
1723		case 1:
1724			return V_008F0C_BUF_DATA_FORMAT_8;
1725		case 2:
1726			return V_008F0C_BUF_DATA_FORMAT_8_8;
1727		case 3:
1728		case 4:
1729			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1730		}
1731		break;
1732	case 16:
1733		switch (desc->nr_channels) {
1734		case 1:
1735			return V_008F0C_BUF_DATA_FORMAT_16;
1736		case 2:
1737			return V_008F0C_BUF_DATA_FORMAT_16_16;
1738		case 3:
1739		case 4:
1740			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1741		}
1742		break;
1743	case 32:
1744		switch (desc->nr_channels) {
1745		case 1:
1746			return V_008F0C_BUF_DATA_FORMAT_32;
1747		case 2:
1748			return V_008F0C_BUF_DATA_FORMAT_32_32;
1749		case 3:
1750			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1751		case 4:
1752			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1753		}
1754		break;
1755	}
1756
1757	return V_008F0C_BUF_DATA_FORMAT_INVALID;
1758}
1759
1760static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1761					      const struct util_format_description *desc,
1762					      int first_non_void)
1763{
1764	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1765		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1766
1767	assert(first_non_void >= 0);
1768
1769	switch (desc->channel[first_non_void].type) {
1770	case UTIL_FORMAT_TYPE_SIGNED:
1771	case UTIL_FORMAT_TYPE_FIXED:
1772		if (desc->channel[first_non_void].size >= 32 ||
1773		    desc->channel[first_non_void].pure_integer)
1774			return V_008F0C_BUF_NUM_FORMAT_SINT;
1775		else if (desc->channel[first_non_void].normalized)
1776			return V_008F0C_BUF_NUM_FORMAT_SNORM;
1777		else
1778			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1779		break;
1780	case UTIL_FORMAT_TYPE_UNSIGNED:
1781		if (desc->channel[first_non_void].size >= 32 ||
1782		    desc->channel[first_non_void].pure_integer)
1783			return V_008F0C_BUF_NUM_FORMAT_UINT;
1784		else if (desc->channel[first_non_void].normalized)
1785			return V_008F0C_BUF_NUM_FORMAT_UNORM;
1786		else
1787			return V_008F0C_BUF_NUM_FORMAT_USCALED;
1788		break;
1789	case UTIL_FORMAT_TYPE_FLOAT:
1790	default:
1791		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1792	}
1793}
1794
1795static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
1796					      enum pipe_format format,
1797					      unsigned usage)
1798{
1799	const struct util_format_description *desc;
1800	int first_non_void;
1801	unsigned data_format;
1802
1803	assert((usage & ~(PIPE_BIND_SHADER_IMAGE |
1804			  PIPE_BIND_SAMPLER_VIEW |
1805			  PIPE_BIND_VERTEX_BUFFER)) == 0);
1806
1807	desc = util_format_description(format);
1808
1809	/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
1810	 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
1811	 * for read-only access (with caveats surrounding bounds checks), but
1812	 * obviously fails for write access which we have to implement for
1813	 * shader images. Luckily, OpenGL doesn't expect this to be supported
1814	 * anyway, and so the only impact is on PBO uploads / downloads, which
1815	 * shouldn't be expected to be fast for GL_RGB anyway.
1816	 */
1817	if (desc->block.bits == 3 * 8 ||
1818	    desc->block.bits == 3 * 16) {
1819		if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
1820		    usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
1821			if (!usage)
1822				return 0;
1823		}
1824	}
1825
1826	first_non_void = util_format_get_first_non_void_channel(format);
1827	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1828	if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
1829		return 0;
1830
1831	return usage;
1832}
1833
1834static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1835{
1836	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1837		r600_translate_colorswap(format, false) != ~0U;
1838}
1839
1840static bool si_is_zs_format_supported(enum pipe_format format)
1841{
1842	return si_translate_dbformat(format) != V_028040_Z_INVALID;
1843}
1844
1845static boolean si_is_format_supported(struct pipe_screen *screen,
1846				      enum pipe_format format,
1847				      enum pipe_texture_target target,
1848				      unsigned sample_count,
1849				      unsigned usage)
1850{
1851	unsigned retval = 0;
1852
1853	if (target >= PIPE_MAX_TEXTURE_TYPES) {
1854		R600_ERR("r600: unsupported texture type %d\n", target);
1855		return false;
1856	}
1857
1858	if (!util_format_is_supported(format, usage))
1859		return false;
1860
1861	if (sample_count > 1) {
1862		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
1863			return false;
1864
1865		if (usage & PIPE_BIND_SHADER_IMAGE)
1866			return false;
1867
1868		switch (sample_count) {
1869		case 2:
1870		case 4:
1871		case 8:
1872			break;
1873		case 16:
1874			if (format == PIPE_FORMAT_NONE)
1875				return true;
1876			else
1877				return false;
1878		default:
1879			return false;
1880		}
1881	}
1882
1883	if (usage & (PIPE_BIND_SAMPLER_VIEW |
1884		     PIPE_BIND_SHADER_IMAGE)) {
1885		if (target == PIPE_BUFFER) {
1886			retval |= si_is_vertex_format_supported(
1887				screen, format, usage & (PIPE_BIND_SAMPLER_VIEW |
1888						         PIPE_BIND_SHADER_IMAGE));
1889		} else {
1890			if (si_is_sampler_format_supported(screen, format))
1891				retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
1892						   PIPE_BIND_SHADER_IMAGE);
1893		}
1894	}
1895
1896	if ((usage & (PIPE_BIND_RENDER_TARGET |
1897		      PIPE_BIND_DISPLAY_TARGET |
1898		      PIPE_BIND_SCANOUT |
1899		      PIPE_BIND_SHARED |
1900		      PIPE_BIND_BLENDABLE)) &&
1901	    si_is_colorbuffer_format_supported(format)) {
1902		retval |= usage &
1903			  (PIPE_BIND_RENDER_TARGET |
1904			   PIPE_BIND_DISPLAY_TARGET |
1905			   PIPE_BIND_SCANOUT |
1906			   PIPE_BIND_SHARED);
1907		if (!util_format_is_pure_integer(format) &&
1908		    !util_format_is_depth_or_stencil(format))
1909			retval |= usage & PIPE_BIND_BLENDABLE;
1910	}
1911
1912	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1913	    si_is_zs_format_supported(format)) {
1914		retval |= PIPE_BIND_DEPTH_STENCIL;
1915	}
1916
1917	if (usage & PIPE_BIND_VERTEX_BUFFER) {
1918		retval |= si_is_vertex_format_supported(screen, format,
1919							PIPE_BIND_VERTEX_BUFFER);
1920	}
1921
1922	if ((usage & PIPE_BIND_LINEAR) &&
1923	    !util_format_is_compressed(format) &&
1924	    !(usage & PIPE_BIND_DEPTH_STENCIL))
1925		retval |= PIPE_BIND_LINEAR;
1926
1927	return retval == usage;
1928}
1929
1930/*
1931 * framebuffer handling
1932 */
1933
1934static void si_choose_spi_color_formats(struct r600_surface *surf,
1935					unsigned format, unsigned swap,
1936					unsigned ntype, bool is_depth)
1937{
1938	/* Alpha is needed for alpha-to-coverage.
1939	 * Blending may be with or without alpha.
1940	 */
1941	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
1942	unsigned alpha = 0; /* exports alpha, but may not support blending */
1943	unsigned blend = 0; /* supports blending, but may not export alpha */
1944	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
1945
1946	/* Choose the SPI color formats. These are required values for Stoney/RB+.
1947	 * Other chips have multiple choices, though they are not necessarily better.
1948	 */
1949	switch (format) {
1950	case V_028C70_COLOR_5_6_5:
1951	case V_028C70_COLOR_1_5_5_5:
1952	case V_028C70_COLOR_5_5_5_1:
1953	case V_028C70_COLOR_4_4_4_4:
1954	case V_028C70_COLOR_10_11_11:
1955	case V_028C70_COLOR_11_11_10:
1956	case V_028C70_COLOR_8:
1957	case V_028C70_COLOR_8_8:
1958	case V_028C70_COLOR_8_8_8_8:
1959	case V_028C70_COLOR_10_10_10_2:
1960	case V_028C70_COLOR_2_10_10_10:
1961		if (ntype == V_028C70_NUMBER_UINT)
1962			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1963		else if (ntype == V_028C70_NUMBER_SINT)
1964			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1965		else
1966			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1967		break;
1968
1969	case V_028C70_COLOR_16:
1970	case V_028C70_COLOR_16_16:
1971	case V_028C70_COLOR_16_16_16_16:
1972		if (ntype == V_028C70_NUMBER_UNORM ||
1973		    ntype == V_028C70_NUMBER_SNORM) {
1974			/* UNORM16 and SNORM16 don't support blending */
1975			if (ntype == V_028C70_NUMBER_UNORM)
1976				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
1977			else
1978				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
1979
1980			/* Use 32 bits per channel for blending. */
1981			if (format == V_028C70_COLOR_16) {
1982				if (swap == V_028C70_SWAP_STD) { /* R */
1983					blend = V_028714_SPI_SHADER_32_R;
1984					blend_alpha = V_028714_SPI_SHADER_32_AR;
1985				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1986					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1987				else
1988					assert(0);
1989			} else if (format == V_028C70_COLOR_16_16) {
1990				if (swap == V_028C70_SWAP_STD) { /* RG */
1991					blend = V_028714_SPI_SHADER_32_GR;
1992					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1993				} else if (swap == V_028C70_SWAP_ALT) /* RA */
1994					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1995				else
1996					assert(0);
1997			} else /* 16_16_16_16 */
1998				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1999		} else if (ntype == V_028C70_NUMBER_UINT)
2000			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
2001		else if (ntype == V_028C70_NUMBER_SINT)
2002			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
2003		else if (ntype == V_028C70_NUMBER_FLOAT)
2004			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
2005		else
2006			assert(0);
2007		break;
2008
2009	case V_028C70_COLOR_32:
2010		if (swap == V_028C70_SWAP_STD) { /* R */
2011			blend = normal = V_028714_SPI_SHADER_32_R;
2012			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
2013		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
2014			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2015		else
2016			assert(0);
2017		break;
2018
2019	case V_028C70_COLOR_32_32:
2020		if (swap == V_028C70_SWAP_STD) { /* RG */
2021			blend = normal = V_028714_SPI_SHADER_32_GR;
2022			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2023		} else if (swap == V_028C70_SWAP_ALT) /* RA */
2024			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2025		else
2026			assert(0);
2027		break;
2028
2029	case V_028C70_COLOR_32_32_32_32:
2030	case V_028C70_COLOR_8_24:
2031	case V_028C70_COLOR_24_8:
2032	case V_028C70_COLOR_X24_8_32_FLOAT:
2033		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2034		break;
2035
2036	default:
2037		assert(0);
2038		return;
2039	}
2040
2041	/* The DB->CB copy needs 32_ABGR. */
2042	if (is_depth)
2043		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2044
2045	surf->spi_shader_col_format = normal;
2046	surf->spi_shader_col_format_alpha = alpha;
2047	surf->spi_shader_col_format_blend = blend;
2048	surf->spi_shader_col_format_blend_alpha = blend_alpha;
2049}
2050
2051static void si_initialize_color_surface(struct si_context *sctx,
2052					struct r600_surface *surf)
2053{
2054	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2055	unsigned color_info, color_attrib, color_view;
2056	unsigned format, swap, ntype, endian;
2057	const struct util_format_description *desc;
2058	int i;
2059	unsigned blend_clamp = 0, blend_bypass = 0;
2060
2061	color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2062		     S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
2063
2064	desc = util_format_description(surf->base.format);
2065	for (i = 0; i < 4; i++) {
2066		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2067			break;
2068		}
2069	}
2070	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
2071		ntype = V_028C70_NUMBER_FLOAT;
2072	} else {
2073		ntype = V_028C70_NUMBER_UNORM;
2074		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
2075			ntype = V_028C70_NUMBER_SRGB;
2076		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2077			if (desc->channel[i].pure_integer) {
2078				ntype = V_028C70_NUMBER_SINT;
2079			} else {
2080				assert(desc->channel[i].normalized);
2081				ntype = V_028C70_NUMBER_SNORM;
2082			}
2083		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2084			if (desc->channel[i].pure_integer) {
2085				ntype = V_028C70_NUMBER_UINT;
2086			} else {
2087				assert(desc->channel[i].normalized);
2088				ntype = V_028C70_NUMBER_UNORM;
2089			}
2090		}
2091	}
2092
2093	format = si_translate_colorformat(surf->base.format);
2094	if (format == V_028C70_COLOR_INVALID) {
2095		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2096	}
2097	assert(format != V_028C70_COLOR_INVALID);
2098	swap = r600_translate_colorswap(surf->base.format, false);
2099	endian = si_colorformat_endian_swap(format);
2100
2101	/* blend clamp should be set for all NORM/SRGB types */
2102	if (ntype == V_028C70_NUMBER_UNORM ||
2103	    ntype == V_028C70_NUMBER_SNORM ||
2104	    ntype == V_028C70_NUMBER_SRGB)
2105		blend_clamp = 1;
2106
2107	/* set blend bypass according to docs if SINT/UINT or
2108	   8/24 COLOR variants */
2109	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2110	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2111	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
2112		blend_clamp = 0;
2113		blend_bypass = 1;
2114	}
2115
2116	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2117	    (format == V_028C70_COLOR_8 ||
2118	     format == V_028C70_COLOR_8_8 ||
2119	     format == V_028C70_COLOR_8_8_8_8))
2120		surf->color_is_int8 = true;
2121
2122	color_info = S_028C70_FORMAT(format) |
2123		S_028C70_COMP_SWAP(swap) |
2124		S_028C70_BLEND_CLAMP(blend_clamp) |
2125		S_028C70_BLEND_BYPASS(blend_bypass) |
2126		S_028C70_SIMPLE_FLOAT(1) |
2127		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2128				    ntype != V_028C70_NUMBER_SNORM &&
2129				    ntype != V_028C70_NUMBER_SRGB &&
2130				    format != V_028C70_COLOR_8_24 &&
2131				    format != V_028C70_COLOR_24_8) |
2132		S_028C70_NUMBER_TYPE(ntype) |
2133		S_028C70_ENDIAN(endian);
2134
2135	/* Intensity is implemented as Red, so treat it that way. */
2136	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2137						  util_format_is_intensity(surf->base.format));
2138
2139	if (rtex->resource.b.b.nr_samples > 1) {
2140		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
2141
2142		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2143				S_028C74_NUM_FRAGMENTS(log_samples);
2144
2145		if (rtex->fmask.size) {
2146			color_info |= S_028C70_COMPRESSION(1);
2147			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
2148
2149			if (sctx->b.chip_class == SI) {
2150				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
2151				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
2152			}
2153		}
2154	}
2155
2156	surf->cb_color_view = color_view;
2157	surf->cb_color_info = color_info;
2158	surf->cb_color_attrib = color_attrib;
2159
2160	if (sctx->b.chip_class >= VI) {
2161		unsigned max_uncompressed_block_size = 2;
2162
2163		if (rtex->resource.b.b.nr_samples > 1) {
2164			if (rtex->surface.bpe == 1)
2165				max_uncompressed_block_size = 0;
2166			else if (rtex->surface.bpe == 2)
2167				max_uncompressed_block_size = 1;
2168		}
2169
2170		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2171		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
2172	}
2173
2174	/* This must be set for fast clear to work without FMASK. */
2175	if (!rtex->fmask.size && sctx->b.chip_class == SI) {
2176		unsigned bankh = util_logbase2(rtex->surface.bankh);
2177		surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2178	}
2179
2180	/* Determine pixel shader export format */
2181	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
2182
2183	surf->color_initialized = true;
2184}
2185
2186static void si_init_depth_surface(struct si_context *sctx,
2187				  struct r600_surface *surf)
2188{
2189	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2190	unsigned level = surf->base.u.tex.level;
2191	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
2192	unsigned format;
2193	uint32_t z_info, s_info, db_depth_info;
2194	uint64_t z_offs, s_offs;
2195	uint32_t db_htile_data_base, db_htile_surface;
2196
2197	format = si_translate_dbformat(rtex->db_render_format);
2198
2199	if (format == V_028040_Z_INVALID) {
2200		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
2201	}
2202	assert(format != V_028040_Z_INVALID);
2203
2204	s_offs = z_offs = rtex->resource.gpu_address;
2205	z_offs += rtex->surface.level[level].offset;
2206	s_offs += rtex->surface.stencil_level[level].offset;
2207
2208	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
2209
2210	z_info = S_028040_FORMAT(format);
2211	if (rtex->resource.b.b.nr_samples > 1) {
2212		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
2213	}
2214
2215	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
2216		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
2217	else
2218		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2219
2220	if (sctx->b.chip_class >= CIK) {
2221		struct radeon_info *info = &sctx->screen->b.info;
2222		unsigned index = rtex->surface.tiling_index[level];
2223		unsigned stencil_index = rtex->surface.stencil_tiling_index[level];
2224		unsigned macro_index = rtex->surface.macro_tile_index;
2225		unsigned tile_mode = info->si_tile_mode_array[index];
2226		unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2227		unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2228
2229		db_depth_info |=
2230			S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2231			S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2232			S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2233			S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2234			S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2235			S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2236		z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2237		s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2238	} else {
2239		unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
2240		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2241		tile_mode_index = si_tile_mode_index(rtex, level, true);
2242		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2243	}
2244
2245	/* HiZ aka depth buffer htile */
2246	/* use htile only for first level */
2247	if (rtex->htile_buffer && !level) {
2248		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2249			  S_028040_ALLOW_EXPCLEAR(1);
2250
2251		if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
2252			/* Workaround: For a not yet understood reason, the
2253			 * combination of MSAA, fast stencil clear and stencil
2254			 * decompress messes with subsequent stencil buffer
2255			 * uses. Problem was reproduced on Verde, Bonaire,
2256			 * Tonga, and Carrizo.
2257			 *
2258			 * Disabling EXPCLEAR works around the problem.
2259			 *
2260			 * Check piglit's arb_texture_multisample-stencil-clear
2261			 * test if you want to try changing this.
2262			 */
2263			if (rtex->resource.b.b.nr_samples <= 1)
2264				s_info |= S_028044_ALLOW_EXPCLEAR(1);
2265		} else if (!rtex->tc_compatible_htile) {
2266			/* Use all of the htile_buffer for depth if there's no stencil.
2267			 * This must not be set when TC-compatible HTILE is enabled
2268			 * due to a hw bug.
2269			 */
2270			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2271		}
2272
2273		uint64_t va = rtex->htile_buffer->gpu_address;
2274		db_htile_data_base = va >> 8;
2275		db_htile_surface = S_028ABC_FULL_CACHE(1);
2276
2277		if (rtex->tc_compatible_htile) {
2278			db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
2279
2280			switch (rtex->resource.b.b.nr_samples) {
2281			case 0:
2282			case 1:
2283				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
2284				break;
2285			case 2:
2286			case 4:
2287				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
2288				break;
2289			case 8:
2290				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
2291				break;
2292			default:
2293				assert(0);
2294			}
2295		}
2296	} else {
2297		db_htile_data_base = 0;
2298		db_htile_surface = 0;
2299	}
2300
2301	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2302
2303	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2304			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2305	surf->db_htile_data_base = db_htile_data_base;
2306	surf->db_depth_info = db_depth_info;
2307	surf->db_z_info = z_info;
2308	surf->db_stencil_info = s_info;
2309	surf->db_depth_base = z_offs >> 8;
2310	surf->db_stencil_base = s_offs >> 8;
2311	surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2312			      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2313	surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2314							levelinfo->nblk_y) / 64 - 1);
2315	surf->db_htile_surface = db_htile_surface;
2316
2317	surf->depth_initialized = true;
2318}
2319
2320static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
2321{
2322	for (int i = 0; i < state->nr_cbufs; ++i) {
2323		struct r600_surface *surf = NULL;
2324		struct r600_texture *rtex;
2325
2326		if (!state->cbufs[i])
2327			continue;
2328		surf = (struct r600_surface*)state->cbufs[i];
2329		rtex = (struct r600_texture*)surf->base.texture;
2330
2331		p_atomic_dec(&rtex->framebuffers_bound);
2332	}
2333}
2334
2335static void si_set_framebuffer_state(struct pipe_context *ctx,
2336				     const struct pipe_framebuffer_state *state)
2337{
2338	struct si_context *sctx = (struct si_context *)ctx;
2339	struct pipe_constant_buffer constbuf = {0};
2340	struct r600_surface *surf = NULL;
2341	struct r600_texture *rtex;
2342	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
2343	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2344	int i;
2345
2346	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
2347		if (!sctx->framebuffer.state.cbufs[i])
2348			continue;
2349
2350		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
2351		if (rtex->dcc_gather_statistics)
2352			vi_separate_dcc_stop_query(ctx, rtex);
2353	}
2354
2355	/* Only flush TC when changing the framebuffer state, because
2356	 * the only client not using TC that can change textures is
2357	 * the framebuffer.
2358	 *
2359	 * Flush all CB and DB caches here because all buffers can be used
2360	 * for write by both TC (with shader image stores) and CB/DB.
2361	 */
2362	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
2363			 SI_CONTEXT_INV_GLOBAL_L2 |
2364			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
2365			 SI_CONTEXT_CS_PARTIAL_FLUSH;
2366
2367	/* Take the maximum of the old and new count. If the new count is lower,
2368	 * dirtying is needed to disable the unbound colorbuffers.
2369	 */
2370	sctx->framebuffer.dirty_cbufs |=
2371		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
2372	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
2373
2374	si_dec_framebuffer_counters(&sctx->framebuffer.state);
2375	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2376
2377	sctx->framebuffer.colorbuf_enabled_4bit = 0;
2378	sctx->framebuffer.spi_shader_col_format = 0;
2379	sctx->framebuffer.spi_shader_col_format_alpha = 0;
2380	sctx->framebuffer.spi_shader_col_format_blend = 0;
2381	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
2382	sctx->framebuffer.color_is_int8 = 0;
2383
2384	sctx->framebuffer.compressed_cb_mask = 0;
2385	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2386	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2387	sctx->framebuffer.any_dst_linear = false;
2388
2389	for (i = 0; i < state->nr_cbufs; i++) {
2390		if (!state->cbufs[i])
2391			continue;
2392
2393		surf = (struct r600_surface*)state->cbufs[i];
2394		rtex = (struct r600_texture*)surf->base.texture;
2395
2396		if (!surf->color_initialized) {
2397			si_initialize_color_surface(sctx, surf);
2398		}
2399
2400		sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
2401		sctx->framebuffer.spi_shader_col_format |=
2402			surf->spi_shader_col_format << (i * 4);
2403		sctx->framebuffer.spi_shader_col_format_alpha |=
2404			surf->spi_shader_col_format_alpha << (i * 4);
2405		sctx->framebuffer.spi_shader_col_format_blend |=
2406			surf->spi_shader_col_format_blend << (i * 4);
2407		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2408			surf->spi_shader_col_format_blend_alpha << (i * 4);
2409
2410		if (surf->color_is_int8)
2411			sctx->framebuffer.color_is_int8 |= 1 << i;
2412
2413		if (rtex->fmask.size) {
2414			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2415		}
2416
2417		if (rtex->surface.is_linear)
2418			sctx->framebuffer.any_dst_linear = true;
2419
2420		r600_context_add_resource_size(ctx, surf->base.texture);
2421
2422		p_atomic_inc(&rtex->framebuffers_bound);
2423
2424		if (rtex->dcc_gather_statistics) {
2425			/* Dirty tracking must be enabled for DCC usage analysis. */
2426			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2427			vi_separate_dcc_start_query(ctx, rtex);
2428		}
2429	}
2430
2431	if (state->zsbuf) {
2432		surf = (struct r600_surface*)state->zsbuf;
2433		rtex = (struct r600_texture*)surf->base.texture;
2434
2435		if (!surf->depth_initialized) {
2436			si_init_depth_surface(sctx, surf);
2437		}
2438		r600_context_add_resource_size(ctx, surf->base.texture);
2439	}
2440
2441	si_update_poly_offset_state(sctx);
2442	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
2443	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
2444
2445	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
2446		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2447
2448	if (sctx->framebuffer.nr_samples != old_nr_samples) {
2449		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2450		si_mark_atom_dirty(sctx, &sctx->db_render_state);
2451
2452		/* Set sample locations as fragment shader constants. */
2453		switch (sctx->framebuffer.nr_samples) {
2454		case 1:
2455			constbuf.user_buffer = sctx->b.sample_locations_1x;
2456			break;
2457		case 2:
2458			constbuf.user_buffer = sctx->b.sample_locations_2x;
2459			break;
2460		case 4:
2461			constbuf.user_buffer = sctx->b.sample_locations_4x;
2462			break;
2463		case 8:
2464			constbuf.user_buffer = sctx->b.sample_locations_8x;
2465			break;
2466		case 16:
2467			constbuf.user_buffer = sctx->b.sample_locations_16x;
2468			break;
2469		default:
2470			R600_ERR("Requested an invalid number of samples %i.\n",
2471				 sctx->framebuffer.nr_samples);
2472			assert(0);
2473		}
2474		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2475		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
2476
2477		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
2478	}
2479
2480	sctx->need_check_render_feedback = true;
2481	sctx->do_update_shaders = true;
2482}
2483
2484static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2485{
2486	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2487	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2488	unsigned i, nr_cbufs = state->nr_cbufs;
2489	struct r600_texture *tex = NULL;
2490	struct r600_surface *cb = NULL;
2491	unsigned cb_color_info = 0;
2492
2493	/* Colorbuffers. */
2494	for (i = 0; i < nr_cbufs; i++) {
2495		const struct radeon_surf_level *level_info;
2496		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2497		unsigned cb_color_base, cb_color_fmask, cb_color_attrib;
2498		unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
2499
2500		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
2501			continue;
2502
2503		cb = (struct r600_surface*)state->cbufs[i];
2504		if (!cb) {
2505			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2506					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2507			continue;
2508		}
2509
2510		tex = (struct r600_texture *)cb->base.texture;
2511		level_info =  &tex->surface.level[cb->base.u.tex.level];
2512		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2513				      &tex->resource, RADEON_USAGE_READWRITE,
2514				      tex->resource.b.b.nr_samples > 1 ?
2515					      RADEON_PRIO_COLOR_BUFFER_MSAA :
2516					      RADEON_PRIO_COLOR_BUFFER);
2517
2518		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2519			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2520				tex->cmask_buffer, RADEON_USAGE_READWRITE,
2521				RADEON_PRIO_CMASK);
2522		}
2523
2524		if (tex->dcc_separate_buffer)
2525			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2526						  tex->dcc_separate_buffer,
2527						  RADEON_USAGE_READWRITE,
2528						  RADEON_PRIO_DCC);
2529
2530		/* Compute mutable surface parameters. */
2531		pitch_tile_max = level_info->nblk_x / 8 - 1;
2532		slice_tile_max = level_info->nblk_x *
2533				 level_info->nblk_y / 64 - 1;
2534		tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
2535
2536		cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8;
2537		cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2538		cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2539		cb_color_attrib = cb->cb_color_attrib |
2540				  S_028C74_TILE_MODE_INDEX(tile_mode_index);
2541
2542		if (tex->fmask.size) {
2543			if (sctx->b.chip_class >= CIK)
2544				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
2545			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
2546			cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
2547			cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
2548		} else {
2549			/* This must be set for fast clear to work without FMASK. */
2550			if (sctx->b.chip_class >= CIK)
2551				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2552			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2553			cb_color_fmask = cb_color_base;
2554			cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2555		}
2556
2557		cb_color_info = cb->cb_color_info | tex->cb_color_info;
2558
2559		if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) {
2560			bool is_msaa_resolve_dst = state->cbufs[0] &&
2561						   state->cbufs[0]->texture->nr_samples > 1 &&
2562						   state->cbufs[1] == &cb->base &&
2563						   state->cbufs[1]->texture->nr_samples <= 1;
2564
2565			if (!is_msaa_resolve_dst)
2566				cb_color_info |= S_028C70_DCC_ENABLE(1);
2567		}
2568
2569		radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
2570					   sctx->b.chip_class >= VI ? 14 : 13);
2571		radeon_emit(cs, cb_color_base);		/* R_028C60_CB_COLOR0_BASE */
2572		radeon_emit(cs, cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
2573		radeon_emit(cs, cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
2574		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
2575		radeon_emit(cs, cb_color_info);		/* R_028C70_CB_COLOR0_INFO */
2576		radeon_emit(cs, cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
2577		radeon_emit(cs, cb->cb_dcc_control);	/* R_028C78_CB_COLOR0_DCC_CONTROL */
2578		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
2579		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
2580		radeon_emit(cs, cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
2581		radeon_emit(cs, cb_color_fmask_slice);		/* R_028C88_CB_COLOR0_FMASK_SLICE */
2582		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2583		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2584
2585		if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
2586			radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
2587					 tex->dcc_offset +
2588				         tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
2589	}
2590	for (; i < 8 ; i++)
2591		if (sctx->framebuffer.dirty_cbufs & (1 << i))
2592			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2593
2594	/* ZS buffer. */
2595	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
2596		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2597		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2598
2599		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2600				      &rtex->resource, RADEON_USAGE_READWRITE,
2601				      zb->base.texture->nr_samples > 1 ?
2602					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
2603					      RADEON_PRIO_DEPTH_BUFFER);
2604
2605		if (zb->db_htile_data_base) {
2606			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2607					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
2608					      RADEON_PRIO_HTILE);
2609		}
2610
2611		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2612		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2613
2614		radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2615		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
2616		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
2617			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2618		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
2619		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
2620		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
2621		radeon_emit(cs, zb->db_depth_base);	/* R_028050_DB_Z_WRITE_BASE */
2622		radeon_emit(cs, zb->db_stencil_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
2623		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
2624		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
2625
2626		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
2627		radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
2628		radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
2629
2630		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2631	} else if (sctx->framebuffer.dirty_zsbuf) {
2632		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2633		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2634		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2635	}
2636
2637	/* Framebuffer dimensions. */
2638        /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2639	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2640			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2641
2642	sctx->framebuffer.dirty_cbufs = 0;
2643	sctx->framebuffer.dirty_zsbuf = false;
2644}
2645
2646static void si_emit_msaa_sample_locs(struct si_context *sctx,
2647				     struct r600_atom *atom)
2648{
2649	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2650	unsigned nr_samples = sctx->framebuffer.nr_samples;
2651
2652	/* Smoothing (only possible with nr_samples == 1) uses the same
2653	 * sample locations as the MSAA it simulates.
2654	 */
2655	if (nr_samples <= 1 && sctx->smoothing_enabled)
2656		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
2657
2658	/* On Polaris, the small primitive filter uses the sample locations
2659	 * even when MSAA is off, so we need to make sure they're set to 0.
2660	 */
2661	if (sctx->b.family >= CHIP_POLARIS10)
2662		nr_samples = MAX2(nr_samples, 1);
2663
2664	if (nr_samples >= 1 &&
2665	    (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
2666		sctx->msaa_sample_locs.nr_samples = nr_samples;
2667		cayman_emit_msaa_sample_locs(cs, nr_samples);
2668	}
2669
2670	if (sctx->b.family >= CHIP_POLARIS10) {
2671		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
2672		unsigned small_prim_filter_cntl =
2673			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
2674			S_028830_LINE_FILTER_DISABLE(1); /* line bug */
2675
2676		/* The alternative of setting sample locations to 0 would
2677		 * require a DB flush to avoid Z errors, see
2678		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
2679		 */
2680		if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
2681			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
2682
2683		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
2684				       small_prim_filter_cntl);
2685	}
2686}
2687
2688static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
2689{
2690	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2691	unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
2692	/* 33% faster rendering to linear color buffers */
2693	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
2694	unsigned sc_mode_cntl_1 =
2695		S_028A4C_WALK_SIZE(dst_is_linear) |
2696		S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
2697		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
2698		/* always 1: */
2699		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
2700		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
2701		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
2702		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
2703		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
2704		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
2705
2706	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2707				sctx->ps_iter_samples,
2708				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
2709				sc_mode_cntl_1);
2710}
2711
2712static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2713{
2714	struct si_context *sctx = (struct si_context *)ctx;
2715
2716	if (sctx->ps_iter_samples == min_samples)
2717		return;
2718
2719	sctx->ps_iter_samples = min_samples;
2720	sctx->do_update_shaders = true;
2721
2722	if (sctx->framebuffer.nr_samples > 1)
2723		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2724}
2725
2726/*
2727 * Samplers
2728 */
2729
2730/**
2731 * Build the sampler view descriptor for a buffer texture.
2732 * @param state 256-bit descriptor; only the high 128 bits are filled in
2733 */
2734void
2735si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
2736			  enum pipe_format format,
2737			  unsigned offset, unsigned size,
2738			  uint32_t *state)
2739{
2740	const struct util_format_description *desc;
2741	int first_non_void;
2742	unsigned stride;
2743	unsigned num_records;
2744	unsigned num_format, data_format;
2745
2746	desc = util_format_description(format);
2747	first_non_void = util_format_get_first_non_void_channel(format);
2748	stride = desc->block.bits / 8;
2749	num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
2750	data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
2751
2752	num_records = size / stride;
2753	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
2754
2755	if (screen->b.chip_class >= VI)
2756		num_records *= stride;
2757
2758	state[4] = 0;
2759	state[5] = S_008F04_STRIDE(stride);
2760	state[6] = num_records;
2761	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2762		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2763		   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2764		   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2765		   S_008F0C_NUM_FORMAT(num_format) |
2766		   S_008F0C_DATA_FORMAT(data_format);
2767}
2768
2769/**
2770 * Build the sampler view descriptor for a texture.
2771 */
2772void
2773si_make_texture_descriptor(struct si_screen *screen,
2774			   struct r600_texture *tex,
2775			   bool sampler,
2776			   enum pipe_texture_target target,
2777			   enum pipe_format pipe_format,
2778			   const unsigned char state_swizzle[4],
2779			   unsigned first_level, unsigned last_level,
2780			   unsigned first_layer, unsigned last_layer,
2781			   unsigned width, unsigned height, unsigned depth,
2782			   uint32_t *state,
2783			   uint32_t *fmask_state)
2784{
2785	struct pipe_resource *res = &tex->resource.b.b;
2786	const struct util_format_description *desc;
2787	unsigned char swizzle[4];
2788	int first_non_void;
2789	unsigned num_format, data_format, type;
2790	uint64_t va;
2791
2792	desc = util_format_description(pipe_format);
2793
2794	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2795		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2796		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2797
2798		switch (pipe_format) {
2799		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2800		case PIPE_FORMAT_X24S8_UINT:
2801		case PIPE_FORMAT_X32_S8X24_UINT:
2802		case PIPE_FORMAT_X8Z24_UNORM:
2803			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2804			break;
2805		default:
2806			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2807		}
2808	} else {
2809		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2810	}
2811
2812	first_non_void = util_format_get_first_non_void_channel(pipe_format);
2813
2814	switch (pipe_format) {
2815	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2816		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2817		break;
2818	default:
2819		if (first_non_void < 0) {
2820			if (util_format_is_compressed(pipe_format)) {
2821				switch (pipe_format) {
2822				case PIPE_FORMAT_DXT1_SRGB:
2823				case PIPE_FORMAT_DXT1_SRGBA:
2824				case PIPE_FORMAT_DXT3_SRGBA:
2825				case PIPE_FORMAT_DXT5_SRGBA:
2826				case PIPE_FORMAT_BPTC_SRGBA:
2827				case PIPE_FORMAT_ETC2_SRGB8:
2828				case PIPE_FORMAT_ETC2_SRGB8A1:
2829				case PIPE_FORMAT_ETC2_SRGBA8:
2830					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2831					break;
2832				case PIPE_FORMAT_RGTC1_SNORM:
2833				case PIPE_FORMAT_LATC1_SNORM:
2834				case PIPE_FORMAT_RGTC2_SNORM:
2835				case PIPE_FORMAT_LATC2_SNORM:
2836				case PIPE_FORMAT_ETC2_R11_SNORM:
2837				case PIPE_FORMAT_ETC2_RG11_SNORM:
2838				/* implies float, so use SNORM/UNORM to determine
2839				   whether data is signed or not */
2840				case PIPE_FORMAT_BPTC_RGB_FLOAT:
2841					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2842					break;
2843				default:
2844					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2845					break;
2846				}
2847			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2848				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2849			} else {
2850				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2851			}
2852		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2853			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2854		} else {
2855			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2856
2857			switch (desc->channel[first_non_void].type) {
2858			case UTIL_FORMAT_TYPE_FLOAT:
2859				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2860				break;
2861			case UTIL_FORMAT_TYPE_SIGNED:
2862				if (desc->channel[first_non_void].normalized)
2863					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2864				else if (desc->channel[first_non_void].pure_integer)
2865					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2866				else
2867					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2868				break;
2869			case UTIL_FORMAT_TYPE_UNSIGNED:
2870				if (desc->channel[first_non_void].normalized)
2871					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2872				else if (desc->channel[first_non_void].pure_integer)
2873					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2874				else
2875					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2876			}
2877		}
2878	}
2879
2880	data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void);
2881	if (data_format == ~0) {
2882		data_format = 0;
2883	}
2884
2885	if (!sampler &&
2886	    (res->target == PIPE_TEXTURE_CUBE ||
2887	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
2888	     res->target == PIPE_TEXTURE_3D)) {
2889		/* For the purpose of shader images, treat cube maps and 3D
2890		 * textures as 2D arrays. For 3D textures, the address
2891		 * calculations for mipmaps are different, so we rely on the
2892		 * caller to effectively disable mipmaps.
2893		 */
2894		type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
2895
2896		assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
2897	} else {
2898		type = si_tex_dim(res->target, target, res->nr_samples);
2899	}
2900
2901	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
2902	        height = 1;
2903		depth = res->array_size;
2904	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
2905		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2906		if (sampler || res->target != PIPE_TEXTURE_3D)
2907			depth = res->array_size;
2908	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
2909		depth = res->array_size / 6;
2910
2911	state[0] = 0;
2912	state[1] = (S_008F14_DATA_FORMAT(data_format) |
2913		    S_008F14_NUM_FORMAT(num_format));
2914	state[2] = (S_008F18_WIDTH(width - 1) |
2915		    S_008F18_HEIGHT(height - 1) |
2916		    S_008F18_PERF_MOD(4));
2917	state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2918		    S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2919		    S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2920		    S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2921		    S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
2922					0 : first_level) |
2923		    S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
2924					util_logbase2(res->nr_samples) :
2925					last_level) |
2926		    S_008F1C_POW2_PAD(res->last_level > 0) |
2927		    S_008F1C_TYPE(type));
2928	state[4] = S_008F20_DEPTH(depth - 1);
2929	state[5] = (S_008F24_BASE_ARRAY(first_layer) |
2930		    S_008F24_LAST_ARRAY(last_layer));
2931	state[6] = 0;
2932	state[7] = 0;
2933
2934	if (tex->dcc_offset) {
2935		unsigned swap = r600_translate_colorswap(pipe_format, false);
2936
2937		state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
2938	} else {
2939		/* The last dword is unused by hw. The shader uses it to clear
2940		 * bits in the first dword of sampler state.
2941		 */
2942		if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
2943			if (first_level == last_level)
2944				state[7] = C_008F30_MAX_ANISO_RATIO;
2945			else
2946				state[7] = 0xffffffff;
2947		}
2948	}
2949
2950	/* Initialize the sampler view for FMASK. */
2951	if (tex->fmask.size) {
2952		uint32_t fmask_format;
2953
2954		va = tex->resource.gpu_address + tex->fmask.offset;
2955
2956		switch (res->nr_samples) {
2957		case 2:
2958			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2959			break;
2960		case 4:
2961			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2962			break;
2963		case 8:
2964			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2965			break;
2966		default:
2967			assert(0);
2968			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2969		}
2970
2971		fmask_state[0] = va >> 8;
2972		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2973				 S_008F14_DATA_FORMAT(fmask_format) |
2974				 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2975		fmask_state[2] = S_008F18_WIDTH(width - 1) |
2976				 S_008F18_HEIGHT(height - 1);
2977		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2978				 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2979				 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2980				 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2981				 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
2982				 S_008F1C_TYPE(si_tex_dim(res->target, target, 0));
2983		fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2984				 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1);
2985		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
2986				 S_008F24_LAST_ARRAY(last_layer);
2987		fmask_state[6] = 0;
2988		fmask_state[7] = 0;
2989	}
2990}
2991
2992/**
2993 * Create a sampler view.
2994 *
2995 * @param ctx		context
2996 * @param texture	texture
2997 * @param state		sampler view template
2998 * @param width0	width0 override (for compressed textures as int)
2999 * @param height0	height0 override (for compressed textures as int)
3000 * @param force_level   set the base address to the level (for compressed textures)
3001 */
3002struct pipe_sampler_view *
3003si_create_sampler_view_custom(struct pipe_context *ctx,
3004			      struct pipe_resource *texture,
3005			      const struct pipe_sampler_view *state,
3006			      unsigned width0, unsigned height0,
3007			      unsigned force_level)
3008{
3009	struct si_context *sctx = (struct si_context*)ctx;
3010	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
3011	struct r600_texture *tmp = (struct r600_texture*)texture;
3012	unsigned base_level, first_level, last_level;
3013	unsigned char state_swizzle[4];
3014	unsigned height, depth, width;
3015	unsigned last_layer = state->u.tex.last_layer;
3016	enum pipe_format pipe_format;
3017	const struct radeon_surf_level *surflevel;
3018
3019	if (!view)
3020		return NULL;
3021
3022	/* initialize base object */
3023	view->base = *state;
3024	view->base.texture = NULL;
3025	view->base.reference.count = 1;
3026	view->base.context = ctx;
3027
3028	assert(texture);
3029	pipe_resource_reference(&view->base.texture, texture);
3030
3031	if (state->format == PIPE_FORMAT_X24S8_UINT ||
3032	    state->format == PIPE_FORMAT_S8X24_UINT ||
3033	    state->format == PIPE_FORMAT_X32_S8X24_UINT ||
3034	    state->format == PIPE_FORMAT_S8_UINT)
3035		view->is_stencil_sampler = true;
3036
3037	/* Buffer resource. */
3038	if (texture->target == PIPE_BUFFER) {
3039		si_make_buffer_descriptor(sctx->screen,
3040					  (struct r600_resource *)texture,
3041					  state->format,
3042					  state->u.buf.offset,
3043					  state->u.buf.size,
3044					  view->state);
3045		return &view->base;
3046	}
3047
3048	state_swizzle[0] = state->swizzle_r;
3049	state_swizzle[1] = state->swizzle_g;
3050	state_swizzle[2] = state->swizzle_b;
3051	state_swizzle[3] = state->swizzle_a;
3052
3053	base_level = 0;
3054	first_level = state->u.tex.first_level;
3055	last_level = state->u.tex.last_level;
3056	width = width0;
3057	height = height0;
3058	depth = texture->depth0;
3059
3060	if (force_level) {
3061		assert(force_level == first_level &&
3062		       force_level == last_level);
3063		base_level = force_level;
3064		first_level = 0;
3065		last_level = 0;
3066		width = u_minify(width, force_level);
3067		height = u_minify(height, force_level);
3068		depth = u_minify(depth, force_level);
3069	}
3070
3071	/* This is not needed if state trackers set last_layer correctly. */
3072	if (state->target == PIPE_TEXTURE_1D ||
3073	    state->target == PIPE_TEXTURE_2D ||
3074	    state->target == PIPE_TEXTURE_RECT ||
3075	    state->target == PIPE_TEXTURE_CUBE)
3076		last_layer = state->u.tex.first_layer;
3077
3078	/* Texturing with separate depth and stencil. */
3079	pipe_format = state->format;
3080
3081	/* Depth/stencil texturing sometimes needs separate texture. */
3082	if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
3083		if (!tmp->flushed_depth_texture &&
3084		    !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
3085			pipe_resource_reference(&view->base.texture, NULL);
3086			FREE(view);
3087			return NULL;
3088		}
3089
3090		assert(tmp->flushed_depth_texture);
3091
3092		/* Override format for the case where the flushed texture
3093		 * contains only Z or only S.
3094		 */
3095		if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
3096			pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
3097
3098		tmp = tmp->flushed_depth_texture;
3099	}
3100
3101	surflevel = tmp->surface.level;
3102
3103	if (tmp->db_compatible) {
3104		if (!view->is_stencil_sampler)
3105			pipe_format = tmp->db_render_format;
3106
3107		switch (pipe_format) {
3108		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
3109			pipe_format = PIPE_FORMAT_Z32_FLOAT;
3110			break;
3111		case PIPE_FORMAT_X8Z24_UNORM:
3112		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
3113			/* Z24 is always stored like this for DB
3114			 * compatibility.
3115			 */
3116			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
3117			break;
3118		case PIPE_FORMAT_X24S8_UINT:
3119		case PIPE_FORMAT_S8X24_UINT:
3120		case PIPE_FORMAT_X32_S8X24_UINT:
3121			pipe_format = PIPE_FORMAT_S8_UINT;
3122			surflevel = tmp->surface.stencil_level;
3123			break;
3124		default:;
3125		}
3126	}
3127
3128	vi_dcc_disable_if_incompatible_format(&sctx->b, texture,
3129					      state->u.tex.first_level,
3130					      state->format);
3131
3132	si_make_texture_descriptor(sctx->screen, tmp, true,
3133				   state->target, pipe_format, state_swizzle,
3134				   first_level, last_level,
3135				   state->u.tex.first_layer, last_layer,
3136				   width, height, depth,
3137				   view->state, view->fmask_state);
3138
3139	view->base_level_info = &surflevel[base_level];
3140	view->base_level = base_level;
3141	view->block_width = util_format_get_blockwidth(pipe_format);
3142	return &view->base;
3143}
3144
3145static struct pipe_sampler_view *
3146si_create_sampler_view(struct pipe_context *ctx,
3147		       struct pipe_resource *texture,
3148		       const struct pipe_sampler_view *state)
3149{
3150	return si_create_sampler_view_custom(ctx, texture, state,
3151					     texture ? texture->width0 : 0,
3152					     texture ? texture->height0 : 0, 0);
3153}
3154
3155static void si_sampler_view_destroy(struct pipe_context *ctx,
3156				    struct pipe_sampler_view *state)
3157{
3158	struct si_sampler_view *view = (struct si_sampler_view *)state;
3159
3160	pipe_resource_reference(&state->texture, NULL);
3161	FREE(view);
3162}
3163
3164static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
3165{
3166	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
3167	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
3168	       (linear_filter &&
3169	        (wrap == PIPE_TEX_WRAP_CLAMP ||
3170		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
3171}
3172
3173static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
3174{
3175	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
3176			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
3177
3178	return (state->border_color.ui[0] || state->border_color.ui[1] ||
3179		state->border_color.ui[2] || state->border_color.ui[3]) &&
3180	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
3181		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
3182		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
3183}
3184
3185static void *si_create_sampler_state(struct pipe_context *ctx,
3186				     const struct pipe_sampler_state *state)
3187{
3188	struct si_context *sctx = (struct si_context *)ctx;
3189	struct r600_common_screen *rscreen = sctx->b.screen;
3190	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
3191	unsigned border_color_type, border_color_index = 0;
3192	unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
3193						       : state->max_anisotropy;
3194	unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
3195
3196	if (!rstate) {
3197		return NULL;
3198	}
3199
3200	if (!sampler_state_needs_border_color(state))
3201		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3202	else if (state->border_color.f[0] == 0 &&
3203		 state->border_color.f[1] == 0 &&
3204		 state->border_color.f[2] == 0 &&
3205		 state->border_color.f[3] == 0)
3206		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3207	else if (state->border_color.f[0] == 0 &&
3208		 state->border_color.f[1] == 0 &&
3209		 state->border_color.f[2] == 0 &&
3210		 state->border_color.f[3] == 1)
3211		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3212	else if (state->border_color.f[0] == 1 &&
3213		 state->border_color.f[1] == 1 &&
3214		 state->border_color.f[2] == 1 &&
3215		 state->border_color.f[3] == 1)
3216		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3217	else {
3218		int i;
3219
3220		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
3221
3222		/* Check if the border has been uploaded already. */
3223		for (i = 0; i < sctx->border_color_count; i++)
3224			if (memcmp(&sctx->border_color_table[i], &state->border_color,
3225				   sizeof(state->border_color)) == 0)
3226				break;
3227
3228		if (i >= SI_MAX_BORDER_COLORS) {
3229			/* Getting 4096 unique border colors is very unlikely. */
3230			fprintf(stderr, "radeonsi: The border color table is full. "
3231				"Any new border colors will be just black. "
3232				"Please file a bug.\n");
3233			border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3234		} else {
3235			if (i == sctx->border_color_count) {
3236				/* Upload a new border color. */
3237				memcpy(&sctx->border_color_table[i], &state->border_color,
3238				       sizeof(state->border_color));
3239				util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
3240							&state->border_color,
3241							sizeof(state->border_color));
3242				sctx->border_color_count++;
3243			}
3244
3245			border_color_index = i;
3246		}
3247	}
3248
3249#ifdef DEBUG
3250	rstate->magic = SI_SAMPLER_STATE_MAGIC;
3251#endif
3252	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
3253			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
3254			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
3255			  S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3256			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
3257			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
3258			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3259			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
3260			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
3261			  S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
3262	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
3263			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
3264			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3265	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
3266			  S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
3267			  S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
3268			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
3269			  S_008F38_MIP_POINT_PRECLAMP(1) |
3270			  S_008F38_DISABLE_LSB_CEIL(1) |
3271			  S_008F38_FILTER_PREC_FIX(1) |
3272			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
3273	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
3274			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
3275	return rstate;
3276}
3277
3278static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3279{
3280	struct si_context *sctx = (struct si_context *)ctx;
3281
3282	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
3283		return;
3284
3285	sctx->sample_mask.sample_mask = sample_mask;
3286	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
3287}
3288
3289static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
3290{
3291	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
3292	unsigned mask = sctx->sample_mask.sample_mask;
3293
3294	/* Needed for line and polygon smoothing as well as for the Polaris
3295	 * small primitive filter. We expect the state tracker to take care of
3296	 * this for us.
3297	 */
3298	assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
3299	       (mask & 1 && sctx->blitter->running));
3300
3301	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
3302	radeon_emit(cs, mask | (mask << 16));
3303	radeon_emit(cs, mask | (mask << 16));
3304}
3305
3306static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3307{
3308#ifdef DEBUG
3309	struct si_sampler_state *s = state;
3310
3311	assert(s->magic == SI_SAMPLER_STATE_MAGIC);
3312	s->magic = 0;
3313#endif
3314	free(state);
3315}
3316
3317/*
3318 * Vertex elements & buffers
3319 */
3320
3321static void *si_create_vertex_elements(struct pipe_context *ctx,
3322				       unsigned count,
3323				       const struct pipe_vertex_element *elements)
3324{
3325	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3326	int i;
3327
3328	assert(count <= SI_MAX_ATTRIBS);
3329	if (!v)
3330		return NULL;
3331
3332	v->count = count;
3333	for (i = 0; i < count; ++i) {
3334		const struct util_format_description *desc;
3335		const struct util_format_channel_description *channel;
3336		unsigned data_format, num_format;
3337		int first_non_void;
3338		unsigned vbo_index = elements[i].vertex_buffer_index;
3339
3340		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
3341			FREE(v);
3342			return NULL;
3343		}
3344
3345		desc = util_format_description(elements[i].src_format);
3346		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3347		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
3348		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
3349		channel = &desc->channel[first_non_void];
3350
3351		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3352				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3353				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3354				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3355				   S_008F0C_NUM_FORMAT(num_format) |
3356				   S_008F0C_DATA_FORMAT(data_format);
3357		v->format_size[i] = desc->block.bits / 8;
3358
3359		/* The hardware always treats the 2-bit alpha channel as
3360		 * unsigned, so a shader workaround is needed.
3361		 */
3362		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
3363			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
3364				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
3365			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
3366				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
3367			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
3368				/* This isn't actually used in OpenGL. */
3369				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
3370			}
3371		} else if (channel->type == UTIL_FORMAT_TYPE_FIXED) {
3372			if (desc->swizzle[3] == PIPE_SWIZZLE_1)
3373				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
3374			else
3375				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
3376		} else if (channel->size == 32 && !channel->pure_integer) {
3377			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
3378				if (channel->normalized) {
3379					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
3380						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
3381					else
3382						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
3383				} else {
3384					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
3385				}
3386			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
3387				if (channel->normalized) {
3388					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
3389						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
3390					else
3391						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
3392				} else {
3393					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
3394				}
3395			}
3396		}
3397
3398		/* We work around the fact that 8_8_8 and 16_16_16 data formats
3399		 * do not exist by using the corresponding 4-component formats.
3400		 * This requires a fixup of the descriptor for bounds checks.
3401		 */
3402		if (desc->block.bits == 3 * 8 ||
3403		    desc->block.bits == 3 * 16) {
3404			v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
3405		}
3406	}
3407	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3408
3409	return v;
3410}
3411
3412static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3413{
3414	struct si_context *sctx = (struct si_context *)ctx;
3415	struct si_vertex_element *v = (struct si_vertex_element*)state;
3416
3417	sctx->vertex_elements = v;
3418	sctx->vertex_buffers_dirty = true;
3419	sctx->do_update_shaders = true;
3420}
3421
3422static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3423{
3424	struct si_context *sctx = (struct si_context *)ctx;
3425
3426	if (sctx->vertex_elements == state)
3427		sctx->vertex_elements = NULL;
3428	FREE(state);
3429}
3430
3431static void si_set_vertex_buffers(struct pipe_context *ctx,
3432				  unsigned start_slot, unsigned count,
3433				  const struct pipe_vertex_buffer *buffers)
3434{
3435	struct si_context *sctx = (struct si_context *)ctx;
3436	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
3437	int i;
3438
3439	assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
3440
3441	if (buffers) {
3442		for (i = 0; i < count; i++) {
3443			const struct pipe_vertex_buffer *src = buffers + i;
3444			struct pipe_vertex_buffer *dsti = dst + i;
3445			struct pipe_resource *buf = src->buffer;
3446
3447			pipe_resource_reference(&dsti->buffer, buf);
3448			dsti->buffer_offset = src->buffer_offset;
3449			dsti->stride = src->stride;
3450			r600_context_add_resource_size(ctx, buf);
3451			if (buf)
3452				r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3453		}
3454	} else {
3455		for (i = 0; i < count; i++) {
3456			pipe_resource_reference(&dst[i].buffer, NULL);
3457		}
3458	}
3459	sctx->vertex_buffers_dirty = true;
3460}
3461
3462static void si_set_index_buffer(struct pipe_context *ctx,
3463				const struct pipe_index_buffer *ib)
3464{
3465	struct si_context *sctx = (struct si_context *)ctx;
3466
3467	if (ib) {
3468		struct pipe_resource *buf = ib->buffer;
3469
3470		pipe_resource_reference(&sctx->index_buffer.buffer, buf);
3471	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
3472		r600_context_add_resource_size(ctx, buf);
3473		if (buf)
3474			r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER;
3475	} else {
3476		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
3477	}
3478}
3479
3480/*
3481 * Misc
3482 */
3483
3484static void si_set_tess_state(struct pipe_context *ctx,
3485			      const float default_outer_level[4],
3486			      const float default_inner_level[2])
3487{
3488	struct si_context *sctx = (struct si_context *)ctx;
3489	struct pipe_constant_buffer cb;
3490	float array[8];
3491
3492	memcpy(array, default_outer_level, sizeof(float) * 4);
3493	memcpy(array+4, default_inner_level, sizeof(float) * 2);
3494
3495	cb.buffer = NULL;
3496	cb.user_buffer = NULL;
3497	cb.buffer_size = sizeof(array);
3498
3499	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
3500			       (void*)array, sizeof(array),
3501			       &cb.buffer_offset);
3502
3503	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
3504	pipe_resource_reference(&cb.buffer, NULL);
3505}
3506
3507static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
3508{
3509	struct si_context *sctx = (struct si_context *)ctx;
3510
3511	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
3512			 SI_CONTEXT_INV_GLOBAL_L2 |
3513			 SI_CONTEXT_FLUSH_AND_INV_CB;
3514}
3515
3516/* This only ensures coherency for shader image/buffer stores. */
3517static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
3518{
3519	struct si_context *sctx = (struct si_context *)ctx;
3520
3521	/* Subsequent commands must wait for all shader invocations to
3522	 * complete. */
3523	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
3524	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
3525
3526	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
3527		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
3528				 SI_CONTEXT_INV_VMEM_L1;
3529
3530	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
3531		     PIPE_BARRIER_SHADER_BUFFER |
3532		     PIPE_BARRIER_TEXTURE |
3533		     PIPE_BARRIER_IMAGE |
3534		     PIPE_BARRIER_STREAMOUT_BUFFER |
3535		     PIPE_BARRIER_GLOBAL_BUFFER)) {
3536		/* As far as I can tell, L1 contents are written back to L2
3537		 * automatically at end of shader, but the contents of other
3538		 * L1 caches might still be stale. */
3539		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
3540	}
3541
3542	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
3543		/* Indices are read through TC L2 since VI.
3544		 * L1 isn't used.
3545		 */
3546		if (sctx->screen->b.chip_class <= CIK)
3547			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3548	}
3549
3550	if (flags & PIPE_BARRIER_FRAMEBUFFER)
3551		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
3552
3553	if (flags & (PIPE_BARRIER_FRAMEBUFFER |
3554		     PIPE_BARRIER_INDIRECT_BUFFER))
3555		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3556}
3557
3558static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3559{
3560	struct pipe_blend_state blend;
3561
3562	memset(&blend, 0, sizeof(blend));
3563	blend.independent_blend_enable = true;
3564	blend.rt[0].colormask = 0xf;
3565	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3566}
3567
3568static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3569				 bool include_draw_vbo)
3570{
3571	si_need_cs_space((struct si_context*)ctx);
3572}
3573
3574static void si_init_config(struct si_context *sctx);
3575
3576void si_init_state_functions(struct si_context *sctx)
3577{
3578	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
3579	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
3580	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
3581	si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
3582	si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
3583
3584	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
3585	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
3586	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
3587	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
3588	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
3589	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
3590	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
3591	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
3592	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
3593	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
3594
3595	sctx->b.b.create_blend_state = si_create_blend_state;
3596	sctx->b.b.bind_blend_state = si_bind_blend_state;
3597	sctx->b.b.delete_blend_state = si_delete_blend_state;
3598	sctx->b.b.set_blend_color = si_set_blend_color;
3599
3600	sctx->b.b.create_rasterizer_state = si_create_rs_state;
3601	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3602	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3603
3604	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3605	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3606	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3607
3608	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3609	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3610	sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3611	sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3612	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
3613
3614	sctx->b.b.set_clip_state = si_set_clip_state;
3615	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
3616
3617	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3618	sctx->b.b.get_sample_position = cayman_get_sample_position;
3619
3620	sctx->b.b.create_sampler_state = si_create_sampler_state;
3621	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3622
3623	sctx->b.b.create_sampler_view = si_create_sampler_view;
3624	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3625
3626	sctx->b.b.set_sample_mask = si_set_sample_mask;
3627
3628	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3629	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3630	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3631	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3632	sctx->b.b.set_index_buffer = si_set_index_buffer;
3633
3634	sctx->b.b.texture_barrier = si_texture_barrier;
3635	sctx->b.b.memory_barrier = si_memory_barrier;
3636	sctx->b.b.set_min_samples = si_set_min_samples;
3637	sctx->b.b.set_tess_state = si_set_tess_state;
3638
3639	sctx->b.b.set_active_query_state = si_set_active_query_state;
3640	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3641	sctx->b.save_qbo_state = si_save_qbo_state;
3642	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3643
3644	sctx->b.b.draw_vbo = si_draw_vbo;
3645
3646	si_init_config(sctx);
3647}
3648
3649static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen)
3650{
3651	return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
3652}
3653
3654static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
3655				     struct r600_texture *rtex,
3656			             struct radeon_bo_metadata *md)
3657{
3658	struct si_screen *sscreen = (struct si_screen*)rscreen;
3659	struct pipe_resource *res = &rtex->resource.b.b;
3660	static const unsigned char swizzle[] = {
3661		PIPE_SWIZZLE_X,
3662		PIPE_SWIZZLE_Y,
3663		PIPE_SWIZZLE_Z,
3664		PIPE_SWIZZLE_W
3665	};
3666	uint32_t desc[8], i;
3667	bool is_array = util_resource_is_array_texture(res);
3668
3669	/* DRM 2.x.x doesn't support this. */
3670	if (rscreen->info.drm_major != 3)
3671		return;
3672
3673	assert(rtex->dcc_separate_buffer == NULL);
3674	assert(rtex->fmask.size == 0);
3675
3676	/* Metadata image format format version 1:
3677	 * [0] = 1 (metadata format identifier)
3678	 * [1] = (VENDOR_ID << 16) | PCI_ID
3679	 * [2:9] = image descriptor for the whole resource
3680	 *         [2] is always 0, because the base address is cleared
3681	 *         [9] is the DCC offset bits [39:8] from the beginning of
3682	 *             the buffer
3683	 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
3684	 */
3685
3686	md->metadata[0] = 1; /* metadata image format version 1 */
3687
3688	/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
3689	md->metadata[1] = si_get_bo_metadata_word1(rscreen);
3690
3691	si_make_texture_descriptor(sscreen, rtex, true,
3692				   res->target, res->format,
3693				   swizzle, 0, res->last_level, 0,
3694				   is_array ? res->array_size - 1 : 0,
3695				   res->width0, res->height0, res->depth0,
3696				   desc, NULL);
3697
3698	si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0,
3699				       rtex->surface.blk_w, false, desc);
3700
3701	/* Clear the base address and set the relative DCC offset. */
3702	desc[0] = 0;
3703	desc[1] &= C_008F14_BASE_ADDRESS_HI;
3704	desc[7] = rtex->dcc_offset >> 8;
3705
3706	/* Dwords [2:9] contain the image descriptor. */
3707	memcpy(&md->metadata[2], desc, sizeof(desc));
3708
3709	/* Dwords [10:..] contain the mipmap level offsets. */
3710	for (i = 0; i <= res->last_level; i++)
3711		md->metadata[10+i] = rtex->surface.level[i].offset >> 8;
3712
3713	md->size_metadata = (11 + res->last_level) * 4;
3714}
3715
3716static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
3717				     struct r600_texture *rtex,
3718			             struct radeon_bo_metadata *md)
3719{
3720	uint32_t *desc = &md->metadata[2];
3721
3722	if (rscreen->chip_class < VI)
3723		return;
3724
3725	/* Return if DCC is enabled. The texture should be set up with it
3726	 * already.
3727	 */
3728	if (md->size_metadata >= 11 * 4 &&
3729	    md->metadata[0] != 0 &&
3730	    md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
3731	    G_008F28_COMPRESSION_EN(desc[6])) {
3732		assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8));
3733		return;
3734	}
3735
3736	/* Disable DCC. These are always set by texture_from_handle and must
3737	 * be cleared here.
3738	 */
3739	rtex->dcc_offset = 0;
3740}
3741
3742void si_init_screen_state_functions(struct si_screen *sscreen)
3743{
3744	sscreen->b.b.is_format_supported = si_is_format_supported;
3745	sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
3746	sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
3747}
3748
3749static void
3750si_write_harvested_raster_configs(struct si_context *sctx,
3751				  struct si_pm4_state *pm4,
3752				  unsigned raster_config,
3753				  unsigned raster_config_1)
3754{
3755	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
3756	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
3757	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3758	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3759	unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
3760	unsigned rb_per_se = num_rb / num_se;
3761	unsigned se_mask[4];
3762	unsigned se;
3763
3764	se_mask[0] = ((1 << rb_per_se) - 1);
3765	se_mask[1] = (se_mask[0] << rb_per_se);
3766	se_mask[2] = (se_mask[1] << rb_per_se);
3767	se_mask[3] = (se_mask[2] << rb_per_se);
3768
3769	se_mask[0] &= rb_mask;
3770	se_mask[1] &= rb_mask;
3771	se_mask[2] &= rb_mask;
3772	se_mask[3] &= rb_mask;
3773
3774	assert(num_se == 1 || num_se == 2 || num_se == 4);
3775	assert(sh_per_se == 1 || sh_per_se == 2);
3776	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
3777
3778	/* XXX: I can't figure out what the *_XSEL and *_YSEL
3779	 * fields are for, so I'm leaving them as their default
3780	 * values. */
3781
3782	for (se = 0; se < num_se; se++) {
3783		unsigned raster_config_se = raster_config;
3784		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3785		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3786		int idx = (se / 2) * 2;
3787
3788		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3789			raster_config_se &= C_028350_SE_MAP;
3790
3791			if (!se_mask[idx]) {
3792				raster_config_se |=
3793					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
3794			} else {
3795				raster_config_se |=
3796					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
3797			}
3798		}
3799
3800		pkr0_mask &= rb_mask;
3801		pkr1_mask &= rb_mask;
3802		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3803			raster_config_se &= C_028350_PKR_MAP;
3804
3805			if (!pkr0_mask) {
3806				raster_config_se |=
3807					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
3808			} else {
3809				raster_config_se |=
3810					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
3811			}
3812		}
3813
3814		if (rb_per_se >= 2) {
3815			unsigned rb0_mask = 1 << (se * rb_per_se);
3816			unsigned rb1_mask = rb0_mask << 1;
3817
3818			rb0_mask &= rb_mask;
3819			rb1_mask &= rb_mask;
3820			if (!rb0_mask || !rb1_mask) {
3821				raster_config_se &= C_028350_RB_MAP_PKR0;
3822
3823				if (!rb0_mask) {
3824					raster_config_se |=
3825						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
3826				} else {
3827					raster_config_se |=
3828						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
3829				}
3830			}
3831
3832			if (rb_per_se > 2) {
3833				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3834				rb1_mask = rb0_mask << 1;
3835				rb0_mask &= rb_mask;
3836				rb1_mask &= rb_mask;
3837				if (!rb0_mask || !rb1_mask) {
3838					raster_config_se &= C_028350_RB_MAP_PKR1;
3839
3840					if (!rb0_mask) {
3841						raster_config_se |=
3842							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3843					} else {
3844						raster_config_se |=
3845							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3846					}
3847				}
3848			}
3849		}
3850
3851		/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3852		if (sctx->b.chip_class < CIK)
3853			si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3854				       SE_INDEX(se) | SH_BROADCAST_WRITES |
3855				       INSTANCE_BROADCAST_WRITES);
3856		else
3857			si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3858				       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
3859				       S_030800_INSTANCE_BROADCAST_WRITES(1));
3860		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3861	}
3862
3863	/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3864	if (sctx->b.chip_class < CIK)
3865		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3866			       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3867			       INSTANCE_BROADCAST_WRITES);
3868	else {
3869		si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3870			       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
3871			       S_030800_INSTANCE_BROADCAST_WRITES(1));
3872
3873		if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3874		                     (!se_mask[2] && !se_mask[3]))) {
3875			raster_config_1 &= C_028354_SE_PAIR_MAP;
3876
3877			if (!se_mask[0] && !se_mask[1]) {
3878				raster_config_1 |=
3879					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
3880			} else {
3881				raster_config_1 |=
3882					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
3883			}
3884		}
3885
3886		si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
3887	}
3888}
3889
3890static void si_init_config(struct si_context *sctx)
3891{
3892	struct si_screen *sscreen = sctx->screen;
3893	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3894	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3895	unsigned raster_config, raster_config_1;
3896	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
3897	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3898
3899	if (!pm4)
3900		return;
3901
3902	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
3903	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
3904	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
3905	si_pm4_cmd_end(pm4, false);
3906
3907	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
3908	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
3909
3910	/* FIXME calculate these values somehow ??? */
3911	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
3912	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3913	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3914
3915	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3916	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3917
3918	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3919	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3920	if (sctx->b.chip_class < CIK)
3921		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3922			       S_008A14_CLIP_VTX_REORDER_ENA(1));
3923
3924	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3925	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3926
3927	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3928
3929	switch (sctx->screen->b.family) {
3930	case CHIP_TAHITI:
3931	case CHIP_PITCAIRN:
3932		raster_config = 0x2a00126a;
3933		raster_config_1 = 0x00000000;
3934		break;
3935	case CHIP_VERDE:
3936		raster_config = 0x0000124a;
3937		raster_config_1 = 0x00000000;
3938		break;
3939	case CHIP_OLAND:
3940		raster_config = 0x00000082;
3941		raster_config_1 = 0x00000000;
3942		break;
3943	case CHIP_HAINAN:
3944		raster_config = 0x00000000;
3945		raster_config_1 = 0x00000000;
3946		break;
3947	case CHIP_BONAIRE:
3948		raster_config = 0x16000012;
3949		raster_config_1 = 0x00000000;
3950		break;
3951	case CHIP_HAWAII:
3952		raster_config = 0x3a00161a;
3953		raster_config_1 = 0x0000002e;
3954		break;
3955	case CHIP_FIJI:
3956		if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
3957			/* old kernels with old tiling config */
3958			raster_config = 0x16000012;
3959			raster_config_1 = 0x0000002a;
3960		} else {
3961			raster_config = 0x3a00161a;
3962			raster_config_1 = 0x0000002e;
3963		}
3964		break;
3965	case CHIP_POLARIS10:
3966		raster_config = 0x16000012;
3967		raster_config_1 = 0x0000002a;
3968		break;
3969	case CHIP_POLARIS11:
3970	case CHIP_POLARIS12:
3971		raster_config = 0x16000012;
3972		raster_config_1 = 0x00000000;
3973		break;
3974	case CHIP_TONGA:
3975		raster_config = 0x16000012;
3976		raster_config_1 = 0x0000002a;
3977		break;
3978	case CHIP_ICELAND:
3979		if (num_rb == 1)
3980			raster_config = 0x00000000;
3981		else
3982			raster_config = 0x00000002;
3983		raster_config_1 = 0x00000000;
3984		break;
3985	case CHIP_CARRIZO:
3986		raster_config = 0x00000002;
3987		raster_config_1 = 0x00000000;
3988		break;
3989	case CHIP_KAVERI:
3990		/* KV should be 0x00000002, but that causes problems with radeon */
3991		raster_config = 0x00000000; /* 0x00000002 */
3992		raster_config_1 = 0x00000000;
3993		break;
3994	case CHIP_KABINI:
3995	case CHIP_MULLINS:
3996	case CHIP_STONEY:
3997		raster_config = 0x00000000;
3998		raster_config_1 = 0x00000000;
3999		break;
4000	default:
4001		fprintf(stderr,
4002			"radeonsi: Unknown GPU, using 0 for raster_config\n");
4003		raster_config = 0x00000000;
4004		raster_config_1 = 0x00000000;
4005		break;
4006	}
4007
4008	/* Always use the default config when all backends are enabled
4009	 * (or when we failed to determine the enabled backends).
4010	 */
4011	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
4012		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
4013			       raster_config);
4014		if (sctx->b.chip_class >= CIK)
4015			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
4016				       raster_config_1);
4017	} else {
4018		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
4019	}
4020
4021	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
4022	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
4023	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
4024		       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
4025	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
4026	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
4027		       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
4028
4029	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
4030	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
4031		       S_028230_ER_TRI(0xA) |
4032		       S_028230_ER_POINT(0xA) |
4033		       S_028230_ER_RECT(0xA) |
4034		       /* Required by DX10_DIAMOND_TEST_ENA: */
4035		       S_028230_ER_LINE_LR(0x1A) |
4036		       S_028230_ER_LINE_RL(0x26) |
4037		       S_028230_ER_LINE_TB(0xA) |
4038		       S_028230_ER_LINE_BT(0xA));
4039	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
4040	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
4041	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
4042	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
4043	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
4044	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
4045	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
4046
4047	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
4048	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
4049	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
4050
4051	if (sctx->b.chip_class >= CIK) {
4052		/* If this is 0, Bonaire can hang even if GS isn't being used.
4053		 * Other chips are unaffected. These are suboptimal values,
4054		 * but we don't use on-chip GS.
4055		 */
4056		si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
4057			       S_028A44_ES_VERTS_PER_SUBGRP(64) |
4058			       S_028A44_GS_PRIMS_PER_SUBGRP(4));
4059
4060		si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
4061		si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
4062		si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
4063		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
4064
4065		if (sscreen->b.info.num_good_compute_units /
4066		    (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
4067			/* Too few available compute units per SH. Disallowing
4068			 * VS to run on CU0 could hurt us more than late VS
4069			 * allocation would help.
4070			 *
4071			 * LATE_ALLOC_VS = 2 is the highest safe number.
4072			 */
4073			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
4074			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
4075		} else {
4076			/* Set LATE_ALLOC_VS == 31. It should be less than
4077			 * the number of scratch waves. Limitations:
4078			 * - VS can't execute on CU0.
4079			 * - If HS writes outputs to LDS, LS can't execute on CU0.
4080			 */
4081			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
4082			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
4083		}
4084
4085		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
4086	}
4087
4088	if (sctx->b.chip_class >= VI) {
4089		unsigned vgt_tess_distribution;
4090
4091		si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
4092			       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
4093			       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
4094		if (sctx->b.family < CHIP_POLARIS10)
4095			si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
4096		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
4097
4098		vgt_tess_distribution =
4099			S_028B50_ACCUM_ISOLINE(32) |
4100			S_028B50_ACCUM_TRI(11) |
4101			S_028B50_ACCUM_QUAD(11) |
4102			S_028B50_DONUT_SPLIT(16);
4103
4104		/* Testing with Unigine Heaven extreme tesselation yielded best results
4105		 * with TRAP_SPLIT = 3.
4106		 */
4107		if (sctx->b.family == CHIP_FIJI ||
4108		    sctx->b.family >= CHIP_POLARIS10)
4109			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
4110
4111		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
4112	} else {
4113		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
4114		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
4115	}
4116
4117	if (sctx->b.family == CHIP_STONEY)
4118		si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
4119
4120	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
4121	if (sctx->b.chip_class >= CIK)
4122		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
4123	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
4124		      RADEON_PRIO_BORDER_COLORS);
4125
4126	si_pm4_upload_indirect_buffer(sctx, pm4);
4127	sctx->init_config = pm4;
4128}
4129