si_state.c revision 1f4bb3826464e2ce1d3f47183c96e6e7fde9a1d7
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Christian König <christian.koenig@amd.com>
25 */
26
27#include "si_pipe.h"
28#include "si_shader.h"
29#include "sid.h"
30#include "radeon/r600_cs.h"
31
32#include "util/u_format.h"
33#include "util/u_format_s3tc.h"
34#include "util/u_memory.h"
35#include "util/u_pstipple.h"
36
37static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
38			 void (*emit)(struct si_context *ctx, struct r600_atom *state),
39			 unsigned num_dw)
40{
41	atom->emit = (void*)emit;
42	atom->num_dw = num_dw;
43	atom->dirty = false;
44	*list_elem = atom;
45}
46
47uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
48{
49	if (sscreen->b.chip_class == CIK &&
50	    sscreen->b.info.cik_macrotile_mode_array_valid) {
51		unsigned index, tileb;
52
53		tileb = 8 * 8 * tex->surface.bpe;
54		tileb = MIN2(tex->surface.tile_split, tileb);
55
56		for (index = 0; tileb > 64; index++) {
57			tileb >>= 1;
58		}
59		assert(index < 16);
60
61		return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3;
62	}
63
64	if (sscreen->b.chip_class == SI &&
65	    sscreen->b.info.si_tile_mode_array_valid) {
66		/* Don't use stencil_tiling_index, because num_banks is always
67		 * read from the depth mode. */
68		unsigned tile_mode_index = tex->surface.tiling_index[0];
69		assert(tile_mode_index < 32);
70
71		return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]);
72	}
73
74	/* The old way. */
75	switch (sscreen->b.tiling_info.num_banks) {
76	case 2:
77		return V_02803C_ADDR_SURF_2_BANK;
78	case 4:
79		return V_02803C_ADDR_SURF_4_BANK;
80	case 8:
81	default:
82		return V_02803C_ADDR_SURF_8_BANK;
83	case 16:
84		return V_02803C_ADDR_SURF_16_BANK;
85	}
86}
87
88unsigned cik_tile_split(unsigned tile_split)
89{
90	switch (tile_split) {
91	case 64:
92		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
93		break;
94	case 128:
95		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
96		break;
97	case 256:
98		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
99		break;
100	case 512:
101		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
102		break;
103	default:
104	case 1024:
105		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
106		break;
107	case 2048:
108		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
109		break;
110	case 4096:
111		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
112		break;
113	}
114	return tile_split;
115}
116
117unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
118{
119	switch (macro_tile_aspect) {
120	default:
121	case 1:
122		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
123		break;
124	case 2:
125		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
126		break;
127	case 4:
128		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
129		break;
130	case 8:
131		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
132		break;
133	}
134	return macro_tile_aspect;
135}
136
137unsigned cik_bank_wh(unsigned bankwh)
138{
139	switch (bankwh) {
140	default:
141	case 1:
142		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
143		break;
144	case 2:
145		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
146		break;
147	case 4:
148		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
149		break;
150	case 8:
151		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
152		break;
153	}
154	return bankwh;
155}
156
157unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode)
158{
159	if (sscreen->b.info.si_tile_mode_array_valid) {
160		uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
161
162		return G_009910_PIPE_CONFIG(gb_tile_mode);
163	}
164
165	/* This is probably broken for a lot of chips, but it's only used
166	 * if the kernel cannot return the tile mode array for CIK. */
167	switch (sscreen->b.info.r600_num_tile_pipes) {
168	case 16:
169		return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
170	case 8:
171		return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
172	case 4:
173	default:
174		if (sscreen->b.info.r600_num_backends == 4)
175			return V_02803C_X_ADDR_SURF_P4_16X16;
176		else
177			return V_02803C_X_ADDR_SURF_P4_8X16;
178	case 2:
179		return V_02803C_ADDR_SURF_P2;
180	}
181}
182
183static unsigned si_map_swizzle(unsigned swizzle)
184{
185	switch (swizzle) {
186	case UTIL_FORMAT_SWIZZLE_Y:
187		return V_008F0C_SQ_SEL_Y;
188	case UTIL_FORMAT_SWIZZLE_Z:
189		return V_008F0C_SQ_SEL_Z;
190	case UTIL_FORMAT_SWIZZLE_W:
191		return V_008F0C_SQ_SEL_W;
192	case UTIL_FORMAT_SWIZZLE_0:
193		return V_008F0C_SQ_SEL_0;
194	case UTIL_FORMAT_SWIZZLE_1:
195		return V_008F0C_SQ_SEL_1;
196	default: /* UTIL_FORMAT_SWIZZLE_X */
197		return V_008F0C_SQ_SEL_X;
198	}
199}
200
201static uint32_t S_FIXED(float value, uint32_t frac_bits)
202{
203	return value * (1 << frac_bits);
204}
205
206/* 12.4 fixed-point */
207static unsigned si_pack_float_12p4(float x)
208{
209	return x <= 0    ? 0 :
210	       x >= 4096 ? 0xffff : x * 16;
211}
212
213/*
214 * Inferred framebuffer and blender state.
215 *
216 * One of the reasons this must be derived from the framebuffer state is that:
217 * - The blend state mask is 0xf most of the time.
218 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
219 *   so COLOR1 is enabled pretty much all the time.
220 * So CB_TARGET_MASK is the only register that can disable COLOR1.
221 */
222static void si_update_fb_blend_state(struct si_context *sctx)
223{
224	struct si_pm4_state *pm4;
225	struct si_state_blend *blend = sctx->queued.named.blend;
226	uint32_t mask = 0, i;
227
228	if (blend == NULL)
229		return;
230
231	pm4 = CALLOC_STRUCT(si_pm4_state);
232	if (pm4 == NULL)
233		return;
234
235	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
236		if (sctx->framebuffer.state.cbufs[i])
237			mask |= 0xf << (4*i);
238	mask &= blend->cb_target_mask;
239
240	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
241	si_pm4_set_state(sctx, fb_blend, pm4);
242}
243
244/*
245 * Blender functions
246 */
247
248static uint32_t si_translate_blend_function(int blend_func)
249{
250	switch (blend_func) {
251	case PIPE_BLEND_ADD:
252		return V_028780_COMB_DST_PLUS_SRC;
253	case PIPE_BLEND_SUBTRACT:
254		return V_028780_COMB_SRC_MINUS_DST;
255	case PIPE_BLEND_REVERSE_SUBTRACT:
256		return V_028780_COMB_DST_MINUS_SRC;
257	case PIPE_BLEND_MIN:
258		return V_028780_COMB_MIN_DST_SRC;
259	case PIPE_BLEND_MAX:
260		return V_028780_COMB_MAX_DST_SRC;
261	default:
262		R600_ERR("Unknown blend function %d\n", blend_func);
263		assert(0);
264		break;
265	}
266	return 0;
267}
268
269static uint32_t si_translate_blend_factor(int blend_fact)
270{
271	switch (blend_fact) {
272	case PIPE_BLENDFACTOR_ONE:
273		return V_028780_BLEND_ONE;
274	case PIPE_BLENDFACTOR_SRC_COLOR:
275		return V_028780_BLEND_SRC_COLOR;
276	case PIPE_BLENDFACTOR_SRC_ALPHA:
277		return V_028780_BLEND_SRC_ALPHA;
278	case PIPE_BLENDFACTOR_DST_ALPHA:
279		return V_028780_BLEND_DST_ALPHA;
280	case PIPE_BLENDFACTOR_DST_COLOR:
281		return V_028780_BLEND_DST_COLOR;
282	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
283		return V_028780_BLEND_SRC_ALPHA_SATURATE;
284	case PIPE_BLENDFACTOR_CONST_COLOR:
285		return V_028780_BLEND_CONSTANT_COLOR;
286	case PIPE_BLENDFACTOR_CONST_ALPHA:
287		return V_028780_BLEND_CONSTANT_ALPHA;
288	case PIPE_BLENDFACTOR_ZERO:
289		return V_028780_BLEND_ZERO;
290	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
291		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
292	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
293		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
294	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
295		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
296	case PIPE_BLENDFACTOR_INV_DST_COLOR:
297		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
298	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
299		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
300	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
301		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
302	case PIPE_BLENDFACTOR_SRC1_COLOR:
303		return V_028780_BLEND_SRC1_COLOR;
304	case PIPE_BLENDFACTOR_SRC1_ALPHA:
305		return V_028780_BLEND_SRC1_ALPHA;
306	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
307		return V_028780_BLEND_INV_SRC1_COLOR;
308	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
309		return V_028780_BLEND_INV_SRC1_ALPHA;
310	default:
311		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
312		assert(0);
313		break;
314	}
315	return 0;
316}
317
318static void *si_create_blend_state_mode(struct pipe_context *ctx,
319					const struct pipe_blend_state *state,
320					unsigned mode)
321{
322	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
323	struct si_pm4_state *pm4 = &blend->pm4;
324
325	uint32_t color_control = 0;
326
327	if (blend == NULL)
328		return NULL;
329
330	blend->alpha_to_one = state->alpha_to_one;
331
332	if (state->logicop_enable) {
333		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
334	} else {
335		color_control |= S_028808_ROP3(0xcc);
336	}
337
338	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
339		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
340		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
341		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
342		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
343		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
344
345	blend->cb_target_mask = 0;
346	for (int i = 0; i < 8; i++) {
347		/* state->rt entries > 0 only written if independent blending */
348		const int j = state->independent_blend_enable ? i : 0;
349
350		unsigned eqRGB = state->rt[j].rgb_func;
351		unsigned srcRGB = state->rt[j].rgb_src_factor;
352		unsigned dstRGB = state->rt[j].rgb_dst_factor;
353		unsigned eqA = state->rt[j].alpha_func;
354		unsigned srcA = state->rt[j].alpha_src_factor;
355		unsigned dstA = state->rt[j].alpha_dst_factor;
356
357		unsigned blend_cntl = 0;
358
359		/* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
360		blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
361
362		if (!state->rt[j].blend_enable) {
363			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
364			continue;
365		}
366
367		blend_cntl |= S_028780_ENABLE(1);
368		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
369		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
370		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
371
372		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
373			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
374			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
375			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
376			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
377		}
378		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
379	}
380
381	if (blend->cb_target_mask) {
382		color_control |= S_028808_MODE(mode);
383	} else {
384		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
385	}
386	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
387
388	return blend;
389}
390
391static void *si_create_blend_state(struct pipe_context *ctx,
392				   const struct pipe_blend_state *state)
393{
394	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
395}
396
397static void si_bind_blend_state(struct pipe_context *ctx, void *state)
398{
399	struct si_context *sctx = (struct si_context *)ctx;
400	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
401	si_update_fb_blend_state(sctx);
402}
403
404static void si_delete_blend_state(struct pipe_context *ctx, void *state)
405{
406	struct si_context *sctx = (struct si_context *)ctx;
407	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
408}
409
410static void si_set_blend_color(struct pipe_context *ctx,
411			       const struct pipe_blend_color *state)
412{
413	struct si_context *sctx = (struct si_context *)ctx;
414	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
415
416        if (pm4 == NULL)
417                return;
418
419	si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
420	si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
421	si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
422	si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
423
424	si_pm4_set_state(sctx, blend_color, pm4);
425}
426
427/*
428 * Clipping, scissors and viewport
429 */
430
431static void si_set_clip_state(struct pipe_context *ctx,
432			      const struct pipe_clip_state *state)
433{
434	struct si_context *sctx = (struct si_context *)ctx;
435	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
436	struct pipe_constant_buffer cb;
437
438	if (pm4 == NULL)
439		return;
440
441	for (int i = 0; i < 6; i++) {
442		si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
443			       fui(state->ucp[i][0]));
444		si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
445			       fui(state->ucp[i][1]));
446		si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
447			       fui(state->ucp[i][2]));
448		si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
449			       fui(state->ucp[i][3]));
450        }
451
452	cb.buffer = NULL;
453	cb.user_buffer = state->ucp;
454	cb.buffer_offset = 0;
455	cb.buffer_size = 4*4*8;
456	ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
457	pipe_resource_reference(&cb.buffer, NULL);
458
459	si_pm4_set_state(sctx, clip, pm4);
460}
461
462#define SIX_BITS 0x3F
463
464static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
465{
466	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
467	struct tgsi_shader_info *info = si_get_vs_info(sctx);
468	unsigned window_space =
469	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
470	unsigned clipdist_mask =
471		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
472
473	r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
474		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
475		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
476		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
477		S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
478		S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
479		S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
480					    info->writes_edgeflag ||
481					    info->writes_layer) |
482		(sctx->queued.named.rasterizer->clip_plane_enable &
483		 clipdist_mask));
484	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
485		sctx->queued.named.rasterizer->pa_cl_clip_cntl |
486		(clipdist_mask ? 0 :
487		 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
488		S_028810_CLIP_DISABLE(window_space));
489}
490
491static void si_set_scissor_states(struct pipe_context *ctx,
492                                  unsigned start_slot,
493                                  unsigned num_scissors,
494                                  const struct pipe_scissor_state *state)
495{
496	struct si_context *sctx = (struct si_context *)ctx;
497	struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
498	struct si_pm4_state *pm4 = &scissor->pm4;
499
500	if (scissor == NULL)
501		return;
502
503	scissor->scissor = *state;
504	si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
505		       S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
506		       S_028250_WINDOW_OFFSET_DISABLE(1));
507	si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
508		       S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
509
510	si_pm4_set_state(sctx, scissor, scissor);
511}
512
513static void si_set_viewport_states(struct pipe_context *ctx,
514                                   unsigned start_slot,
515                                   unsigned num_viewports,
516                                   const struct pipe_viewport_state *state)
517{
518	struct si_context *sctx = (struct si_context *)ctx;
519	struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
520	struct si_pm4_state *pm4 = &viewport->pm4;
521
522	if (viewport == NULL)
523		return;
524
525	viewport->viewport = *state;
526	si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
527	si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
528	si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
529	si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
530	si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
531	si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
532
533	si_pm4_set_state(sctx, viewport, viewport);
534}
535
536/*
537 * inferred state between framebuffer and rasterizer
538 */
539static void si_update_fb_rs_state(struct si_context *sctx)
540{
541	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
542	struct si_pm4_state *pm4;
543	float offset_units;
544
545	if (!rs || !sctx->framebuffer.state.zsbuf)
546		return;
547
548	offset_units = sctx->queued.named.rasterizer->offset_units;
549	switch (sctx->framebuffer.state.zsbuf->texture->format) {
550	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
551	case PIPE_FORMAT_X8Z24_UNORM:
552	case PIPE_FORMAT_Z24X8_UNORM:
553	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
554		offset_units *= 2.0f;
555		break;
556	case PIPE_FORMAT_Z32_FLOAT:
557	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
558		offset_units *= 1.0f;
559		break;
560	case PIPE_FORMAT_Z16_UNORM:
561		offset_units *= 4.0f;
562		break;
563	default:
564		return;
565	}
566
567	pm4 = CALLOC_STRUCT(si_pm4_state);
568
569	if (pm4 == NULL)
570		return;
571
572	/* FIXME some of those reg can be computed with cso */
573	si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
574		       fui(sctx->queued.named.rasterizer->offset_scale));
575	si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
576	si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
577		       fui(sctx->queued.named.rasterizer->offset_scale));
578	si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
579
580	si_pm4_set_state(sctx, fb_rs, pm4);
581}
582
583/*
584 * Rasterizer
585 */
586
587static uint32_t si_translate_fill(uint32_t func)
588{
589	switch(func) {
590	case PIPE_POLYGON_MODE_FILL:
591		return V_028814_X_DRAW_TRIANGLES;
592	case PIPE_POLYGON_MODE_LINE:
593		return V_028814_X_DRAW_LINES;
594	case PIPE_POLYGON_MODE_POINT:
595		return V_028814_X_DRAW_POINTS;
596	default:
597		assert(0);
598		return V_028814_X_DRAW_POINTS;
599	}
600}
601
602static void *si_create_rs_state(struct pipe_context *ctx,
603				const struct pipe_rasterizer_state *state)
604{
605	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
606	struct si_pm4_state *pm4 = &rs->pm4;
607	unsigned tmp;
608	float psize_min, psize_max;
609
610	if (rs == NULL) {
611		return NULL;
612	}
613
614	rs->two_side = state->light_twoside;
615	rs->multisample_enable = state->multisample;
616	rs->clip_plane_enable = state->clip_plane_enable;
617	rs->line_stipple_enable = state->line_stipple_enable;
618	rs->poly_stipple_enable = state->poly_stipple_enable;
619	rs->line_smooth = state->line_smooth;
620	rs->poly_smooth = state->poly_smooth;
621
622	rs->flatshade = state->flatshade;
623	rs->sprite_coord_enable = state->sprite_coord_enable;
624	rs->pa_sc_line_stipple = state->line_stipple_enable ?
625				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
626				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
627	rs->pa_cl_clip_cntl =
628		S_028810_PS_UCP_MODE(3) |
629		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
630		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
631		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
632		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
633		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
634
635	/* offset */
636	rs->offset_units = state->offset_units;
637	rs->offset_scale = state->offset_scale * 12.0f;
638
639	tmp = S_0286D4_FLAT_SHADE_ENA(1);
640	if (state->sprite_coord_enable) {
641		tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
642			S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
643			S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
644			S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
645			S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
646		if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
647			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
648		}
649	}
650	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
651
652	/* point size 12.4 fixed point */
653	tmp = (unsigned)(state->point_size * 8.0);
654	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
655
656	if (state->point_size_per_vertex) {
657		psize_min = util_get_min_point_size(state);
658		psize_max = 8192;
659	} else {
660		/* Force the point size to be as if the vertex output was disabled. */
661		psize_min = state->point_size;
662		psize_max = state->point_size;
663	}
664	/* Divide by two, because 0.5 = 1 pixel. */
665	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
666			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
667			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
668
669	tmp = (unsigned)state->line_width * 8;
670	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
671	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
672		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
673		       S_028A48_MSAA_ENABLE(state->multisample ||
674					    state->poly_smooth ||
675					    state->line_smooth) |
676		       S_028A48_VPORT_SCISSOR_ENABLE(state->scissor));
677
678	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
679		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
680		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
681
682	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
683	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
684		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
685		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
686		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
687		S_028814_FACE(!state->front_ccw) |
688		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
689		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
690		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
691		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
692				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
693		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
694		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
695	return rs;
696}
697
698static void si_bind_rs_state(struct pipe_context *ctx, void *state)
699{
700	struct si_context *sctx = (struct si_context *)ctx;
701	struct si_state_rasterizer *old_rs =
702		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
703	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
704
705	if (state == NULL)
706		return;
707
708	if (sctx->framebuffer.nr_samples > 1 &&
709	    (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
710		sctx->db_render_state.dirty = true;
711
712	si_pm4_bind_state(sctx, rasterizer, rs);
713	si_update_fb_rs_state(sctx);
714
715	sctx->clip_regs.dirty = true;
716}
717
718static void si_delete_rs_state(struct pipe_context *ctx, void *state)
719{
720	struct si_context *sctx = (struct si_context *)ctx;
721	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
722}
723
724/*
725 * infeered state between dsa and stencil ref
726 */
727static void si_update_dsa_stencil_ref(struct si_context *sctx)
728{
729	struct si_pm4_state *pm4;
730	struct pipe_stencil_ref *ref = &sctx->stencil_ref;
731	struct si_state_dsa *dsa = sctx->queued.named.dsa;
732
733	if (!dsa)
734		return;
735
736	pm4 = CALLOC_STRUCT(si_pm4_state);
737	if (pm4 == NULL)
738		return;
739
740	si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
741		       S_028430_STENCILTESTVAL(ref->ref_value[0]) |
742		       S_028430_STENCILMASK(dsa->valuemask[0]) |
743		       S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
744		       S_028430_STENCILOPVAL(1));
745	si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
746		       S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
747		       S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
748		       S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
749		       S_028434_STENCILOPVAL_BF(1));
750
751	si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
752}
753
754static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
755				    const struct pipe_stencil_ref *state)
756{
757        struct si_context *sctx = (struct si_context *)ctx;
758        sctx->stencil_ref = *state;
759	si_update_dsa_stencil_ref(sctx);
760}
761
762
763/*
764 * DSA
765 */
766
767static uint32_t si_translate_stencil_op(int s_op)
768{
769	switch (s_op) {
770	case PIPE_STENCIL_OP_KEEP:
771		return V_02842C_STENCIL_KEEP;
772	case PIPE_STENCIL_OP_ZERO:
773		return V_02842C_STENCIL_ZERO;
774	case PIPE_STENCIL_OP_REPLACE:
775		return V_02842C_STENCIL_REPLACE_TEST;
776	case PIPE_STENCIL_OP_INCR:
777		return V_02842C_STENCIL_ADD_CLAMP;
778	case PIPE_STENCIL_OP_DECR:
779		return V_02842C_STENCIL_SUB_CLAMP;
780	case PIPE_STENCIL_OP_INCR_WRAP:
781		return V_02842C_STENCIL_ADD_WRAP;
782	case PIPE_STENCIL_OP_DECR_WRAP:
783		return V_02842C_STENCIL_SUB_WRAP;
784	case PIPE_STENCIL_OP_INVERT:
785		return V_02842C_STENCIL_INVERT;
786	default:
787		R600_ERR("Unknown stencil op %d", s_op);
788		assert(0);
789		break;
790	}
791	return 0;
792}
793
794static void *si_create_dsa_state(struct pipe_context *ctx,
795				 const struct pipe_depth_stencil_alpha_state *state)
796{
797	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
798	struct si_pm4_state *pm4 = &dsa->pm4;
799	unsigned db_depth_control;
800	uint32_t db_stencil_control = 0;
801
802	if (dsa == NULL) {
803		return NULL;
804	}
805
806	dsa->valuemask[0] = state->stencil[0].valuemask;
807	dsa->valuemask[1] = state->stencil[1].valuemask;
808	dsa->writemask[0] = state->stencil[0].writemask;
809	dsa->writemask[1] = state->stencil[1].writemask;
810
811	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
812		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
813		S_028800_ZFUNC(state->depth.func);
814
815	/* stencil */
816	if (state->stencil[0].enabled) {
817		db_depth_control |= S_028800_STENCIL_ENABLE(1);
818		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
819		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
820		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
821		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
822
823		if (state->stencil[1].enabled) {
824			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
825			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
826			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
827			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
828			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
829		}
830	}
831
832	/* alpha */
833	if (state->alpha.enabled) {
834		dsa->alpha_func = state->alpha.func;
835
836		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
837		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
838	} else {
839		dsa->alpha_func = PIPE_FUNC_ALWAYS;
840	}
841
842	/* misc */
843	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
844	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
845
846	return dsa;
847}
848
849static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
850{
851        struct si_context *sctx = (struct si_context *)ctx;
852        struct si_state_dsa *dsa = state;
853
854        if (state == NULL)
855                return;
856
857	si_pm4_bind_state(sctx, dsa, dsa);
858	si_update_dsa_stencil_ref(sctx);
859}
860
861static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
862{
863	struct si_context *sctx = (struct si_context *)ctx;
864	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
865}
866
867static void *si_create_db_flush_dsa(struct si_context *sctx)
868{
869	struct pipe_depth_stencil_alpha_state dsa = {};
870
871	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
872}
873
874/* DB RENDER STATE */
875
876static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
877{
878	struct si_context *sctx = (struct si_context*)ctx;
879
880	sctx->db_render_state.dirty = true;
881}
882
883static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
884{
885	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
886	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
887	unsigned db_shader_control;
888
889	r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
890
891	/* DB_RENDER_CONTROL */
892	if (sctx->dbcb_depth_copy_enabled ||
893	    sctx->dbcb_stencil_copy_enabled) {
894		radeon_emit(cs,
895			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
896			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
897			    S_028000_COPY_CENTROID(1) |
898			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
899	} else if (sctx->db_inplace_flush_enabled) {
900		radeon_emit(cs,
901			    S_028000_DEPTH_COMPRESS_DISABLE(1) |
902			    S_028000_STENCIL_COMPRESS_DISABLE(1));
903	} else if (sctx->db_depth_clear) {
904		radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
905	} else {
906		radeon_emit(cs, 0);
907	}
908
909	/* DB_COUNT_CONTROL (occlusion queries) */
910	if (sctx->b.num_occlusion_queries > 0) {
911		if (sctx->b.chip_class >= CIK) {
912			radeon_emit(cs,
913				    S_028004_PERFECT_ZPASS_COUNTS(1) |
914				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
915				    S_028004_ZPASS_ENABLE(1) |
916				    S_028004_SLICE_EVEN_ENABLE(1) |
917				    S_028004_SLICE_ODD_ENABLE(1));
918		} else {
919			radeon_emit(cs,
920				    S_028004_PERFECT_ZPASS_COUNTS(1) |
921				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
922		}
923	} else {
924		/* Disable occlusion queries. */
925		if (sctx->b.chip_class >= CIK) {
926			radeon_emit(cs, 0);
927		} else {
928			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
929		}
930	}
931
932	/* DB_RENDER_OVERRIDE2 */
933	if (sctx->db_depth_disable_expclear) {
934		r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
935			S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
936	} else {
937		r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
938	}
939
940	db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
941		            sctx->ps_db_shader_control;
942
943	/* Bug workaround for smoothing (overrasterization) on SI. */
944	if (sctx->b.chip_class == SI && sctx->smoothing_enabled)
945		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
946	else
947		db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
948
949	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
950	if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
951		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
952
953	r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
954			       db_shader_control);
955}
956
957/*
958 * format translation
959 */
960static uint32_t si_translate_colorformat(enum pipe_format format)
961{
962	const struct util_format_description *desc = util_format_description(format);
963
964#define HAS_SIZE(x,y,z,w) \
965	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
966         desc->channel[2].size == (z) && desc->channel[3].size == (w))
967
968	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
969		return V_028C70_COLOR_10_11_11;
970
971	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
972		return V_028C70_COLOR_INVALID;
973
974	switch (desc->nr_channels) {
975	case 1:
976		switch (desc->channel[0].size) {
977		case 8:
978			return V_028C70_COLOR_8;
979		case 16:
980			return V_028C70_COLOR_16;
981		case 32:
982			return V_028C70_COLOR_32;
983		}
984		break;
985	case 2:
986		if (desc->channel[0].size == desc->channel[1].size) {
987			switch (desc->channel[0].size) {
988			case 8:
989				return V_028C70_COLOR_8_8;
990			case 16:
991				return V_028C70_COLOR_16_16;
992			case 32:
993				return V_028C70_COLOR_32_32;
994			}
995		} else if (HAS_SIZE(8,24,0,0)) {
996			return V_028C70_COLOR_24_8;
997		} else if (HAS_SIZE(24,8,0,0)) {
998			return V_028C70_COLOR_8_24;
999		}
1000		break;
1001	case 3:
1002		if (HAS_SIZE(5,6,5,0)) {
1003			return V_028C70_COLOR_5_6_5;
1004		} else if (HAS_SIZE(32,8,24,0)) {
1005			return V_028C70_COLOR_X24_8_32_FLOAT;
1006		}
1007		break;
1008	case 4:
1009		if (desc->channel[0].size == desc->channel[1].size &&
1010		    desc->channel[0].size == desc->channel[2].size &&
1011		    desc->channel[0].size == desc->channel[3].size) {
1012			switch (desc->channel[0].size) {
1013			case 4:
1014				return V_028C70_COLOR_4_4_4_4;
1015			case 8:
1016				return V_028C70_COLOR_8_8_8_8;
1017			case 16:
1018				return V_028C70_COLOR_16_16_16_16;
1019			case 32:
1020				return V_028C70_COLOR_32_32_32_32;
1021			}
1022		} else if (HAS_SIZE(5,5,5,1)) {
1023			return V_028C70_COLOR_1_5_5_5;
1024		} else if (HAS_SIZE(10,10,10,2)) {
1025			return V_028C70_COLOR_2_10_10_10;
1026		}
1027		break;
1028	}
1029	return V_028C70_COLOR_INVALID;
1030}
1031
1032static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1033{
1034	if (SI_BIG_ENDIAN) {
1035		switch(colorformat) {
1036		/* 8-bit buffers. */
1037		case V_028C70_COLOR_8:
1038			return V_028C70_ENDIAN_NONE;
1039
1040		/* 16-bit buffers. */
1041		case V_028C70_COLOR_5_6_5:
1042		case V_028C70_COLOR_1_5_5_5:
1043		case V_028C70_COLOR_4_4_4_4:
1044		case V_028C70_COLOR_16:
1045		case V_028C70_COLOR_8_8:
1046			return V_028C70_ENDIAN_8IN16;
1047
1048		/* 32-bit buffers. */
1049		case V_028C70_COLOR_8_8_8_8:
1050		case V_028C70_COLOR_2_10_10_10:
1051		case V_028C70_COLOR_8_24:
1052		case V_028C70_COLOR_24_8:
1053		case V_028C70_COLOR_16_16:
1054			return V_028C70_ENDIAN_8IN32;
1055
1056		/* 64-bit buffers. */
1057		case V_028C70_COLOR_16_16_16_16:
1058			return V_028C70_ENDIAN_8IN16;
1059
1060		case V_028C70_COLOR_32_32:
1061			return V_028C70_ENDIAN_8IN32;
1062
1063		/* 128-bit buffers. */
1064		case V_028C70_COLOR_32_32_32_32:
1065			return V_028C70_ENDIAN_8IN32;
1066		default:
1067			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1068		}
1069	} else {
1070		return V_028C70_ENDIAN_NONE;
1071	}
1072}
1073
1074/* Returns the size in bits of the widest component of a CB format */
1075static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1076{
1077	switch(colorformat) {
1078	case V_028C70_COLOR_4_4_4_4:
1079		return 4;
1080
1081	case V_028C70_COLOR_1_5_5_5:
1082	case V_028C70_COLOR_5_5_5_1:
1083		return 5;
1084
1085	case V_028C70_COLOR_5_6_5:
1086		return 6;
1087
1088	case V_028C70_COLOR_8:
1089	case V_028C70_COLOR_8_8:
1090	case V_028C70_COLOR_8_8_8_8:
1091		return 8;
1092
1093	case V_028C70_COLOR_10_10_10_2:
1094	case V_028C70_COLOR_2_10_10_10:
1095		return 10;
1096
1097	case V_028C70_COLOR_10_11_11:
1098	case V_028C70_COLOR_11_11_10:
1099		return 11;
1100
1101	case V_028C70_COLOR_16:
1102	case V_028C70_COLOR_16_16:
1103	case V_028C70_COLOR_16_16_16_16:
1104		return 16;
1105
1106	case V_028C70_COLOR_8_24:
1107	case V_028C70_COLOR_24_8:
1108		return 24;
1109
1110	case V_028C70_COLOR_32:
1111	case V_028C70_COLOR_32_32:
1112	case V_028C70_COLOR_32_32_32_32:
1113	case V_028C70_COLOR_X24_8_32_FLOAT:
1114		return 32;
1115	}
1116
1117	assert(!"Unknown maximum component size");
1118	return 0;
1119}
1120
1121static uint32_t si_translate_dbformat(enum pipe_format format)
1122{
1123	switch (format) {
1124	case PIPE_FORMAT_Z16_UNORM:
1125		return V_028040_Z_16;
1126	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1127	case PIPE_FORMAT_X8Z24_UNORM:
1128	case PIPE_FORMAT_Z24X8_UNORM:
1129	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1130		return V_028040_Z_24; /* deprecated on SI */
1131	case PIPE_FORMAT_Z32_FLOAT:
1132	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1133		return V_028040_Z_32_FLOAT;
1134	default:
1135		return V_028040_Z_INVALID;
1136	}
1137}
1138
1139/*
1140 * Texture translation
1141 */
1142
1143static uint32_t si_translate_texformat(struct pipe_screen *screen,
1144				       enum pipe_format format,
1145				       const struct util_format_description *desc,
1146				       int first_non_void)
1147{
1148	struct si_screen *sscreen = (struct si_screen*)screen;
1149	bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
1150	boolean uniform = TRUE;
1151	int i;
1152
1153	/* Colorspace (return non-RGB formats directly). */
1154	switch (desc->colorspace) {
1155	/* Depth stencil formats */
1156	case UTIL_FORMAT_COLORSPACE_ZS:
1157		switch (format) {
1158		case PIPE_FORMAT_Z16_UNORM:
1159			return V_008F14_IMG_DATA_FORMAT_16;
1160		case PIPE_FORMAT_X24S8_UINT:
1161		case PIPE_FORMAT_Z24X8_UNORM:
1162		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1163			return V_008F14_IMG_DATA_FORMAT_8_24;
1164		case PIPE_FORMAT_X8Z24_UNORM:
1165		case PIPE_FORMAT_S8X24_UINT:
1166		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1167			return V_008F14_IMG_DATA_FORMAT_24_8;
1168		case PIPE_FORMAT_S8_UINT:
1169			return V_008F14_IMG_DATA_FORMAT_8;
1170		case PIPE_FORMAT_Z32_FLOAT:
1171			return V_008F14_IMG_DATA_FORMAT_32;
1172		case PIPE_FORMAT_X32_S8X24_UINT:
1173		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1174			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1175		default:
1176			goto out_unknown;
1177		}
1178
1179	case UTIL_FORMAT_COLORSPACE_YUV:
1180		goto out_unknown; /* TODO */
1181
1182	case UTIL_FORMAT_COLORSPACE_SRGB:
1183		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1184			goto out_unknown;
1185		break;
1186
1187	default:
1188		break;
1189	}
1190
1191	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1192		if (!enable_s3tc)
1193			goto out_unknown;
1194
1195		switch (format) {
1196		case PIPE_FORMAT_RGTC1_SNORM:
1197		case PIPE_FORMAT_LATC1_SNORM:
1198		case PIPE_FORMAT_RGTC1_UNORM:
1199		case PIPE_FORMAT_LATC1_UNORM:
1200			return V_008F14_IMG_DATA_FORMAT_BC4;
1201		case PIPE_FORMAT_RGTC2_SNORM:
1202		case PIPE_FORMAT_LATC2_SNORM:
1203		case PIPE_FORMAT_RGTC2_UNORM:
1204		case PIPE_FORMAT_LATC2_UNORM:
1205			return V_008F14_IMG_DATA_FORMAT_BC5;
1206		default:
1207			goto out_unknown;
1208		}
1209	}
1210
1211	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1212		if (!enable_s3tc)
1213			goto out_unknown;
1214
1215		switch (format) {
1216		case PIPE_FORMAT_BPTC_RGBA_UNORM:
1217		case PIPE_FORMAT_BPTC_SRGBA:
1218			return V_008F14_IMG_DATA_FORMAT_BC7;
1219		case PIPE_FORMAT_BPTC_RGB_FLOAT:
1220		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1221			return V_008F14_IMG_DATA_FORMAT_BC6;
1222		default:
1223			goto out_unknown;
1224		}
1225	}
1226
1227	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1228		switch (format) {
1229		case PIPE_FORMAT_R8G8_B8G8_UNORM:
1230		case PIPE_FORMAT_G8R8_B8R8_UNORM:
1231			return V_008F14_IMG_DATA_FORMAT_GB_GR;
1232		case PIPE_FORMAT_G8R8_G8B8_UNORM:
1233		case PIPE_FORMAT_R8G8_R8B8_UNORM:
1234			return V_008F14_IMG_DATA_FORMAT_BG_RG;
1235		default:
1236			goto out_unknown;
1237		}
1238	}
1239
1240	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1241
1242		if (!enable_s3tc)
1243			goto out_unknown;
1244
1245		if (!util_format_s3tc_enabled) {
1246			goto out_unknown;
1247		}
1248
1249		switch (format) {
1250		case PIPE_FORMAT_DXT1_RGB:
1251		case PIPE_FORMAT_DXT1_RGBA:
1252		case PIPE_FORMAT_DXT1_SRGB:
1253		case PIPE_FORMAT_DXT1_SRGBA:
1254			return V_008F14_IMG_DATA_FORMAT_BC1;
1255		case PIPE_FORMAT_DXT3_RGBA:
1256		case PIPE_FORMAT_DXT3_SRGBA:
1257			return V_008F14_IMG_DATA_FORMAT_BC2;
1258		case PIPE_FORMAT_DXT5_RGBA:
1259		case PIPE_FORMAT_DXT5_SRGBA:
1260			return V_008F14_IMG_DATA_FORMAT_BC3;
1261		default:
1262			goto out_unknown;
1263		}
1264	}
1265
1266	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1267		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1268	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1269		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1270	}
1271
1272	/* R8G8Bx_SNORM - TODO CxV8U8 */
1273
1274	/* See whether the components are of the same size. */
1275	for (i = 1; i < desc->nr_channels; i++) {
1276		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1277	}
1278
1279	/* Non-uniform formats. */
1280	if (!uniform) {
1281		switch(desc->nr_channels) {
1282		case 3:
1283			if (desc->channel[0].size == 5 &&
1284			    desc->channel[1].size == 6 &&
1285			    desc->channel[2].size == 5) {
1286				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1287			}
1288			goto out_unknown;
1289		case 4:
1290			if (desc->channel[0].size == 5 &&
1291			    desc->channel[1].size == 5 &&
1292			    desc->channel[2].size == 5 &&
1293			    desc->channel[3].size == 1) {
1294				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1295			}
1296			if (desc->channel[0].size == 10 &&
1297			    desc->channel[1].size == 10 &&
1298			    desc->channel[2].size == 10 &&
1299			    desc->channel[3].size == 2) {
1300				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1301			}
1302			goto out_unknown;
1303		}
1304		goto out_unknown;
1305	}
1306
1307	if (first_non_void < 0 || first_non_void > 3)
1308		goto out_unknown;
1309
1310	/* uniform formats */
1311	switch (desc->channel[first_non_void].size) {
1312	case 4:
1313		switch (desc->nr_channels) {
1314#if 0 /* Not supported for render targets */
1315		case 2:
1316			return V_008F14_IMG_DATA_FORMAT_4_4;
1317#endif
1318		case 4:
1319			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1320		}
1321		break;
1322	case 8:
1323		switch (desc->nr_channels) {
1324		case 1:
1325			return V_008F14_IMG_DATA_FORMAT_8;
1326		case 2:
1327			return V_008F14_IMG_DATA_FORMAT_8_8;
1328		case 4:
1329			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1330		}
1331		break;
1332	case 16:
1333		switch (desc->nr_channels) {
1334		case 1:
1335			return V_008F14_IMG_DATA_FORMAT_16;
1336		case 2:
1337			return V_008F14_IMG_DATA_FORMAT_16_16;
1338		case 4:
1339			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1340		}
1341		break;
1342	case 32:
1343		switch (desc->nr_channels) {
1344		case 1:
1345			return V_008F14_IMG_DATA_FORMAT_32;
1346		case 2:
1347			return V_008F14_IMG_DATA_FORMAT_32_32;
1348#if 0 /* Not supported for render targets */
1349		case 3:
1350			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1351#endif
1352		case 4:
1353			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1354		}
1355	}
1356
1357out_unknown:
1358	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1359	return ~0;
1360}
1361
1362static unsigned si_tex_wrap(unsigned wrap)
1363{
1364	switch (wrap) {
1365	default:
1366	case PIPE_TEX_WRAP_REPEAT:
1367		return V_008F30_SQ_TEX_WRAP;
1368	case PIPE_TEX_WRAP_CLAMP:
1369		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1370	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1371		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1372	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1373		return V_008F30_SQ_TEX_CLAMP_BORDER;
1374	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1375		return V_008F30_SQ_TEX_MIRROR;
1376	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1377		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1378	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1379		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1380	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1381		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1382	}
1383}
1384
1385static unsigned si_tex_filter(unsigned filter)
1386{
1387	switch (filter) {
1388	default:
1389	case PIPE_TEX_FILTER_NEAREST:
1390		return V_008F38_SQ_TEX_XY_FILTER_POINT;
1391	case PIPE_TEX_FILTER_LINEAR:
1392		return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1393	}
1394}
1395
1396static unsigned si_tex_mipfilter(unsigned filter)
1397{
1398	switch (filter) {
1399	case PIPE_TEX_MIPFILTER_NEAREST:
1400		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1401	case PIPE_TEX_MIPFILTER_LINEAR:
1402		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1403	default:
1404	case PIPE_TEX_MIPFILTER_NONE:
1405		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1406	}
1407}
1408
1409static unsigned si_tex_compare(unsigned compare)
1410{
1411	switch (compare) {
1412	default:
1413	case PIPE_FUNC_NEVER:
1414		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1415	case PIPE_FUNC_LESS:
1416		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1417	case PIPE_FUNC_EQUAL:
1418		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1419	case PIPE_FUNC_LEQUAL:
1420		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1421	case PIPE_FUNC_GREATER:
1422		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1423	case PIPE_FUNC_NOTEQUAL:
1424		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1425	case PIPE_FUNC_GEQUAL:
1426		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1427	case PIPE_FUNC_ALWAYS:
1428		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1429	}
1430}
1431
1432static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1433{
1434	switch (dim) {
1435	default:
1436	case PIPE_TEXTURE_1D:
1437		return V_008F1C_SQ_RSRC_IMG_1D;
1438	case PIPE_TEXTURE_1D_ARRAY:
1439		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1440	case PIPE_TEXTURE_2D:
1441	case PIPE_TEXTURE_RECT:
1442		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1443					V_008F1C_SQ_RSRC_IMG_2D;
1444	case PIPE_TEXTURE_2D_ARRAY:
1445		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1446					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1447	case PIPE_TEXTURE_3D:
1448		return V_008F1C_SQ_RSRC_IMG_3D;
1449	case PIPE_TEXTURE_CUBE:
1450	case PIPE_TEXTURE_CUBE_ARRAY:
1451		return V_008F1C_SQ_RSRC_IMG_CUBE;
1452	}
1453}
1454
1455/*
1456 * Format support testing
1457 */
1458
1459static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1460{
1461	return si_translate_texformat(screen, format, util_format_description(format),
1462				      util_format_get_first_non_void_channel(format)) != ~0U;
1463}
1464
1465static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1466					       const struct util_format_description *desc,
1467					       int first_non_void)
1468{
1469	unsigned type = desc->channel[first_non_void].type;
1470	int i;
1471
1472	if (type == UTIL_FORMAT_TYPE_FIXED)
1473		return V_008F0C_BUF_DATA_FORMAT_INVALID;
1474
1475	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1476		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1477
1478	if (desc->nr_channels == 4 &&
1479	    desc->channel[0].size == 10 &&
1480	    desc->channel[1].size == 10 &&
1481	    desc->channel[2].size == 10 &&
1482	    desc->channel[3].size == 2)
1483		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1484
1485	/* See whether the components are of the same size. */
1486	for (i = 0; i < desc->nr_channels; i++) {
1487		if (desc->channel[first_non_void].size != desc->channel[i].size)
1488			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1489	}
1490
1491	switch (desc->channel[first_non_void].size) {
1492	case 8:
1493		switch (desc->nr_channels) {
1494		case 1:
1495			return V_008F0C_BUF_DATA_FORMAT_8;
1496		case 2:
1497			return V_008F0C_BUF_DATA_FORMAT_8_8;
1498		case 3:
1499		case 4:
1500			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1501		}
1502		break;
1503	case 16:
1504		switch (desc->nr_channels) {
1505		case 1:
1506			return V_008F0C_BUF_DATA_FORMAT_16;
1507		case 2:
1508			return V_008F0C_BUF_DATA_FORMAT_16_16;
1509		case 3:
1510		case 4:
1511			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1512		}
1513		break;
1514	case 32:
1515		/* From the Southern Islands ISA documentation about MTBUF:
1516		 * 'Memory reads of data in memory that is 32 or 64 bits do not
1517		 * undergo any format conversion.'
1518		 */
1519		if (type != UTIL_FORMAT_TYPE_FLOAT &&
1520		    !desc->channel[first_non_void].pure_integer)
1521			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1522
1523		switch (desc->nr_channels) {
1524		case 1:
1525			return V_008F0C_BUF_DATA_FORMAT_32;
1526		case 2:
1527			return V_008F0C_BUF_DATA_FORMAT_32_32;
1528		case 3:
1529			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1530		case 4:
1531			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1532		}
1533		break;
1534	}
1535
1536	return V_008F0C_BUF_DATA_FORMAT_INVALID;
1537}
1538
1539static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1540					      const struct util_format_description *desc,
1541					      int first_non_void)
1542{
1543	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1544		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1545
1546	switch (desc->channel[first_non_void].type) {
1547	case UTIL_FORMAT_TYPE_SIGNED:
1548		if (desc->channel[first_non_void].normalized)
1549			return V_008F0C_BUF_NUM_FORMAT_SNORM;
1550		else if (desc->channel[first_non_void].pure_integer)
1551			return V_008F0C_BUF_NUM_FORMAT_SINT;
1552		else
1553			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1554		break;
1555	case UTIL_FORMAT_TYPE_UNSIGNED:
1556		if (desc->channel[first_non_void].normalized)
1557			return V_008F0C_BUF_NUM_FORMAT_UNORM;
1558		else if (desc->channel[first_non_void].pure_integer)
1559			return V_008F0C_BUF_NUM_FORMAT_UINT;
1560		else
1561			return V_008F0C_BUF_NUM_FORMAT_USCALED;
1562		break;
1563	case UTIL_FORMAT_TYPE_FLOAT:
1564	default:
1565		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1566	}
1567}
1568
1569static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1570{
1571	const struct util_format_description *desc;
1572	int first_non_void;
1573	unsigned data_format;
1574
1575	desc = util_format_description(format);
1576	first_non_void = util_format_get_first_non_void_channel(format);
1577	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1578	return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1579}
1580
1581static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1582{
1583	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1584		r600_translate_colorswap(format) != ~0U;
1585}
1586
1587static bool si_is_zs_format_supported(enum pipe_format format)
1588{
1589	return si_translate_dbformat(format) != V_028040_Z_INVALID;
1590}
1591
1592boolean si_is_format_supported(struct pipe_screen *screen,
1593                               enum pipe_format format,
1594                               enum pipe_texture_target target,
1595                               unsigned sample_count,
1596                               unsigned usage)
1597{
1598	struct si_screen *sscreen = (struct si_screen *)screen;
1599	unsigned retval = 0;
1600
1601	if (target >= PIPE_MAX_TEXTURE_TYPES) {
1602		R600_ERR("r600: unsupported texture type %d\n", target);
1603		return FALSE;
1604	}
1605
1606	if (!util_format_is_supported(format, usage))
1607		return FALSE;
1608
1609	if (sample_count > 1) {
1610		/* 2D tiling on CIK is supported since DRM 2.35.0 */
1611		if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
1612			return FALSE;
1613
1614		switch (sample_count) {
1615		case 2:
1616		case 4:
1617		case 8:
1618			break;
1619		default:
1620			return FALSE;
1621		}
1622	}
1623
1624	if (usage & PIPE_BIND_SAMPLER_VIEW) {
1625		if (target == PIPE_BUFFER) {
1626			if (si_is_vertex_format_supported(screen, format))
1627				retval |= PIPE_BIND_SAMPLER_VIEW;
1628		} else {
1629			if (si_is_sampler_format_supported(screen, format))
1630				retval |= PIPE_BIND_SAMPLER_VIEW;
1631		}
1632	}
1633
1634	if ((usage & (PIPE_BIND_RENDER_TARGET |
1635		      PIPE_BIND_DISPLAY_TARGET |
1636		      PIPE_BIND_SCANOUT |
1637		      PIPE_BIND_SHARED |
1638		      PIPE_BIND_BLENDABLE)) &&
1639	    si_is_colorbuffer_format_supported(format)) {
1640		retval |= usage &
1641			  (PIPE_BIND_RENDER_TARGET |
1642			   PIPE_BIND_DISPLAY_TARGET |
1643			   PIPE_BIND_SCANOUT |
1644			   PIPE_BIND_SHARED);
1645		if (!util_format_is_pure_integer(format) &&
1646		    !util_format_is_depth_or_stencil(format))
1647			retval |= usage & PIPE_BIND_BLENDABLE;
1648	}
1649
1650	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1651	    si_is_zs_format_supported(format)) {
1652		retval |= PIPE_BIND_DEPTH_STENCIL;
1653	}
1654
1655	if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1656	    si_is_vertex_format_supported(screen, format)) {
1657		retval |= PIPE_BIND_VERTEX_BUFFER;
1658	}
1659
1660	if (usage & PIPE_BIND_TRANSFER_READ)
1661		retval |= PIPE_BIND_TRANSFER_READ;
1662	if (usage & PIPE_BIND_TRANSFER_WRITE)
1663		retval |= PIPE_BIND_TRANSFER_WRITE;
1664
1665	return retval == usage;
1666}
1667
1668unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1669{
1670	unsigned tile_mode_index = 0;
1671
1672	if (stencil) {
1673		tile_mode_index = rtex->surface.stencil_tiling_index[level];
1674	} else {
1675		tile_mode_index = rtex->surface.tiling_index[level];
1676	}
1677	return tile_mode_index;
1678}
1679
1680/*
1681 * framebuffer handling
1682 */
1683
1684static void si_initialize_color_surface(struct si_context *sctx,
1685					struct r600_surface *surf)
1686{
1687	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1688	unsigned level = surf->base.u.tex.level;
1689	uint64_t offset = rtex->surface.level[level].offset;
1690	unsigned pitch, slice;
1691	unsigned color_info, color_attrib, color_pitch, color_view;
1692	unsigned tile_mode_index;
1693	unsigned format, swap, ntype, endian;
1694	const struct util_format_description *desc;
1695	int i;
1696	unsigned blend_clamp = 0, blend_bypass = 0;
1697	unsigned max_comp_size;
1698
1699	/* Layered rendering doesn't work with LINEAR_GENERAL.
1700	 * (LINEAR_ALIGNED and others work) */
1701	if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) {
1702		assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer);
1703		offset += rtex->surface.level[level].slice_size *
1704			  surf->base.u.tex.first_layer;
1705		color_view = 0;
1706	} else {
1707		color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
1708			     S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
1709	}
1710
1711	pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1712	slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1713	if (slice) {
1714		slice = slice - 1;
1715	}
1716
1717	tile_mode_index = si_tile_mode_index(rtex, level, false);
1718
1719	desc = util_format_description(surf->base.format);
1720	for (i = 0; i < 4; i++) {
1721		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1722			break;
1723		}
1724	}
1725	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1726		ntype = V_028C70_NUMBER_FLOAT;
1727	} else {
1728		ntype = V_028C70_NUMBER_UNORM;
1729		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1730			ntype = V_028C70_NUMBER_SRGB;
1731		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1732			if (desc->channel[i].pure_integer) {
1733				ntype = V_028C70_NUMBER_SINT;
1734			} else {
1735				assert(desc->channel[i].normalized);
1736				ntype = V_028C70_NUMBER_SNORM;
1737			}
1738		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1739			if (desc->channel[i].pure_integer) {
1740				ntype = V_028C70_NUMBER_UINT;
1741			} else {
1742				assert(desc->channel[i].normalized);
1743				ntype = V_028C70_NUMBER_UNORM;
1744			}
1745		}
1746	}
1747
1748	format = si_translate_colorformat(surf->base.format);
1749	if (format == V_028C70_COLOR_INVALID) {
1750		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1751	}
1752	assert(format != V_028C70_COLOR_INVALID);
1753	swap = r600_translate_colorswap(surf->base.format);
1754	if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1755		endian = V_028C70_ENDIAN_NONE;
1756	} else {
1757		endian = si_colorformat_endian_swap(format);
1758	}
1759
1760	/* blend clamp should be set for all NORM/SRGB types */
1761	if (ntype == V_028C70_NUMBER_UNORM ||
1762	    ntype == V_028C70_NUMBER_SNORM ||
1763	    ntype == V_028C70_NUMBER_SRGB)
1764		blend_clamp = 1;
1765
1766	/* set blend bypass according to docs if SINT/UINT or
1767	   8/24 COLOR variants */
1768	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1769	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1770	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
1771		blend_clamp = 0;
1772		blend_bypass = 1;
1773	}
1774
1775	color_info = S_028C70_FORMAT(format) |
1776		S_028C70_COMP_SWAP(swap) |
1777		S_028C70_BLEND_CLAMP(blend_clamp) |
1778		S_028C70_BLEND_BYPASS(blend_bypass) |
1779		S_028C70_NUMBER_TYPE(ntype) |
1780		S_028C70_ENDIAN(endian);
1781
1782	color_pitch = S_028C64_TILE_MAX(pitch);
1783
1784	color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1785		S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1786
1787	if (rtex->resource.b.b.nr_samples > 1) {
1788		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1789
1790		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1791				S_028C74_NUM_FRAGMENTS(log_samples);
1792
1793		if (rtex->fmask.size) {
1794			color_info |= S_028C70_COMPRESSION(1);
1795			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1796
1797			color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
1798
1799			if (sctx->b.chip_class == SI) {
1800				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
1801				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1802			}
1803			if (sctx->b.chip_class >= CIK) {
1804				color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1);
1805			}
1806		}
1807	}
1808
1809	offset += rtex->resource.gpu_address;
1810
1811	surf->cb_color_base = offset >> 8;
1812	surf->cb_color_pitch = color_pitch;
1813	surf->cb_color_slice = S_028C68_TILE_MAX(slice);
1814	surf->cb_color_view = color_view;
1815	surf->cb_color_info = color_info;
1816	surf->cb_color_attrib = color_attrib;
1817
1818	if (rtex->fmask.size) {
1819		surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
1820		surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
1821	} else {
1822		/* This must be set for fast clear to work without FMASK. */
1823		surf->cb_color_fmask = surf->cb_color_base;
1824		surf->cb_color_fmask_slice = surf->cb_color_slice;
1825		surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1826
1827		if (sctx->b.chip_class == SI) {
1828			unsigned bankh = util_logbase2(rtex->surface.bankh);
1829			surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1830		}
1831
1832		if (sctx->b.chip_class >= CIK) {
1833			surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch);
1834		}
1835	}
1836
1837	/* Determine pixel shader export format */
1838	max_comp_size = si_colorformat_max_comp_size(format);
1839	if (ntype == V_028C70_NUMBER_SRGB ||
1840	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1841	     max_comp_size <= 10) ||
1842	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1843		surf->export_16bpc = true;
1844	}
1845
1846	surf->color_initialized = true;
1847}
1848
1849static void si_init_depth_surface(struct si_context *sctx,
1850				  struct r600_surface *surf)
1851{
1852	struct si_screen *sscreen = sctx->screen;
1853	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1854	unsigned level = surf->base.u.tex.level;
1855	struct radeon_surface_level *levelinfo = &rtex->surface.level[level];
1856	unsigned format, tile_mode_index, array_mode;
1857	unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1858	uint32_t z_info, s_info, db_depth_info;
1859	uint64_t z_offs, s_offs;
1860	uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0;
1861
1862	switch (sctx->framebuffer.state.zsbuf->texture->format) {
1863	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1864	case PIPE_FORMAT_X8Z24_UNORM:
1865	case PIPE_FORMAT_Z24X8_UNORM:
1866	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1867		pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1868		break;
1869	case PIPE_FORMAT_Z32_FLOAT:
1870	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1871		pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1872						S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1873		break;
1874	case PIPE_FORMAT_Z16_UNORM:
1875		pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1876		break;
1877	default:
1878		assert(0);
1879	}
1880
1881	format = si_translate_dbformat(rtex->resource.b.b.format);
1882
1883	if (format == V_028040_Z_INVALID) {
1884		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1885	}
1886	assert(format != V_028040_Z_INVALID);
1887
1888	s_offs = z_offs = rtex->resource.gpu_address;
1889	z_offs += rtex->surface.level[level].offset;
1890	s_offs += rtex->surface.stencil_level[level].offset;
1891
1892	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1893
1894	z_info = S_028040_FORMAT(format);
1895	if (rtex->resource.b.b.nr_samples > 1) {
1896		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1897	}
1898
1899	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1900		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1901	else
1902		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1903
1904	if (sctx->b.chip_class >= CIK) {
1905		switch (rtex->surface.level[level].mode) {
1906		case RADEON_SURF_MODE_2D:
1907			array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
1908			break;
1909		case RADEON_SURF_MODE_1D:
1910		case RADEON_SURF_MODE_LINEAR_ALIGNED:
1911		case RADEON_SURF_MODE_LINEAR:
1912		default:
1913			array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
1914			break;
1915		}
1916		tile_split = rtex->surface.tile_split;
1917		stile_split = rtex->surface.stencil_tile_split;
1918		macro_aspect = rtex->surface.mtilea;
1919		bankw = rtex->surface.bankw;
1920		bankh = rtex->surface.bankh;
1921		tile_split = cik_tile_split(tile_split);
1922		stile_split = cik_tile_split(stile_split);
1923		macro_aspect = cik_macro_tile_aspect(macro_aspect);
1924		bankw = cik_bank_wh(bankw);
1925		bankh = cik_bank_wh(bankh);
1926		nbanks = si_num_banks(sscreen, rtex);
1927		tile_mode_index = si_tile_mode_index(rtex, level, false);
1928		pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
1929
1930		db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
1931			S_02803C_PIPE_CONFIG(pipe_config) |
1932			S_02803C_BANK_WIDTH(bankw) |
1933			S_02803C_BANK_HEIGHT(bankh) |
1934			S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
1935			S_02803C_NUM_BANKS(nbanks);
1936		z_info |= S_028040_TILE_SPLIT(tile_split);
1937		s_info |= S_028044_TILE_SPLIT(stile_split);
1938	} else {
1939		tile_mode_index = si_tile_mode_index(rtex, level, false);
1940		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1941		tile_mode_index = si_tile_mode_index(rtex, level, true);
1942		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1943	}
1944
1945	/* HiZ aka depth buffer htile */
1946	/* use htile only for first level */
1947	if (rtex->htile_buffer && !level) {
1948		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
1949			  S_028040_ALLOW_EXPCLEAR(1);
1950
1951		/* This is optimal for the clear value of 1.0 and using
1952		 * the LESS and LEQUAL test functions. Set this to 0
1953		 * for the opposite case. This can only be changed when
1954		 * clearing. */
1955		z_info |= S_028040_ZRANGE_PRECISION(1);
1956
1957		/* Use all of the htile_buffer for depth, because we don't
1958		 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
1959		s_info |= S_028044_TILE_STENCIL_DISABLE(1);
1960
1961		uint64_t va = rtex->htile_buffer->gpu_address;
1962		db_htile_data_base = va >> 8;
1963		db_htile_surface = S_028ABC_FULL_CACHE(1);
1964	} else {
1965		db_htile_data_base = 0;
1966		db_htile_surface = 0;
1967	}
1968
1969	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
1970
1971	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
1972			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
1973	surf->db_htile_data_base = db_htile_data_base;
1974	surf->db_depth_info = db_depth_info;
1975	surf->db_z_info = z_info;
1976	surf->db_stencil_info = s_info;
1977	surf->db_depth_base = z_offs >> 8;
1978	surf->db_stencil_base = s_offs >> 8;
1979	surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
1980			      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
1981	surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
1982							levelinfo->nblk_y) / 64 - 1);
1983	surf->db_htile_surface = db_htile_surface;
1984	surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
1985
1986	surf->depth_initialized = true;
1987}
1988
1989static void si_set_framebuffer_state(struct pipe_context *ctx,
1990				     const struct pipe_framebuffer_state *state)
1991{
1992	struct si_context *sctx = (struct si_context *)ctx;
1993	struct pipe_constant_buffer constbuf = {0};
1994	struct r600_surface *surf = NULL;
1995	struct r600_texture *rtex;
1996	bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
1997	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
1998	int i;
1999
2000	/* Only flush TC when changing the framebuffer state, because
2001	 * the only client not using TC that can change textures is
2002	 * the framebuffer.
2003	 *
2004	 * Flush all CB and DB caches here because all buffers can be used
2005	 * for write by both TC (with shader image stores) and CB/DB.
2006	 */
2007	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
2008			 SI_CONTEXT_INV_TC_L2 |
2009			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
2010
2011	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2012
2013	sctx->framebuffer.export_16bpc = 0;
2014	sctx->framebuffer.compressed_cb_mask = 0;
2015	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2016	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2017	sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
2018				  util_format_is_pure_integer(state->cbufs[0]->format);
2019
2020	if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
2021		sctx->db_render_state.dirty = true;
2022
2023	for (i = 0; i < state->nr_cbufs; i++) {
2024		if (!state->cbufs[i])
2025			continue;
2026
2027		surf = (struct r600_surface*)state->cbufs[i];
2028		rtex = (struct r600_texture*)surf->base.texture;
2029
2030		if (!surf->color_initialized) {
2031			si_initialize_color_surface(sctx, surf);
2032		}
2033
2034		if (surf->export_16bpc) {
2035			sctx->framebuffer.export_16bpc |= 1 << i;
2036		}
2037
2038		if (rtex->fmask.size && rtex->cmask.size) {
2039			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2040		}
2041	}
2042	/* Set the 16BPC export for possible dual-src blending. */
2043	if (i == 1 && surf && surf->export_16bpc) {
2044		sctx->framebuffer.export_16bpc |= 1 << 1;
2045	}
2046
2047	assert(!(sctx->framebuffer.export_16bpc & ~0xff));
2048
2049	if (state->zsbuf) {
2050		surf = (struct r600_surface*)state->zsbuf;
2051
2052		if (!surf->depth_initialized) {
2053			si_init_depth_surface(sctx, surf);
2054		}
2055	}
2056
2057	si_update_fb_rs_state(sctx);
2058	si_update_fb_blend_state(sctx);
2059
2060	sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
2061	sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
2062	sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
2063	sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
2064	sctx->framebuffer.atom.dirty = true;
2065
2066	if (sctx->framebuffer.nr_samples != old_nr_samples) {
2067		sctx->msaa_config.dirty = true;
2068		sctx->db_render_state.dirty = true;
2069
2070		/* Set sample locations as fragment shader constants. */
2071		switch (sctx->framebuffer.nr_samples) {
2072		case 1:
2073			constbuf.user_buffer = sctx->b.sample_locations_1x;
2074			break;
2075		case 2:
2076			constbuf.user_buffer = sctx->b.sample_locations_2x;
2077			break;
2078		case 4:
2079			constbuf.user_buffer = sctx->b.sample_locations_4x;
2080			break;
2081		case 8:
2082			constbuf.user_buffer = sctx->b.sample_locations_8x;
2083			break;
2084		case 16:
2085			constbuf.user_buffer = sctx->b.sample_locations_16x;
2086			break;
2087		default:
2088			assert(0);
2089		}
2090		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2091		ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
2092					 SI_DRIVER_STATE_CONST_BUF, &constbuf);
2093
2094		/* Smoothing (only possible with nr_samples == 1) uses the same
2095		 * sample locations as the MSAA it simulates.
2096		 *
2097		 * Therefore, don't update the sample locations when
2098		 * transitioning from no AA to smoothing-equivalent AA, and
2099		 * vice versa.
2100		 */
2101		if ((sctx->framebuffer.nr_samples != 1 ||
2102		     old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
2103		    (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
2104		     old_nr_samples != 1))
2105			sctx->msaa_sample_locs.dirty = true;
2106	}
2107}
2108
2109static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2110{
2111	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2112	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2113	unsigned i, nr_cbufs = state->nr_cbufs;
2114	struct r600_texture *tex = NULL;
2115	struct r600_surface *cb = NULL;
2116
2117	/* Colorbuffers. */
2118	for (i = 0; i < nr_cbufs; i++) {
2119		cb = (struct r600_surface*)state->cbufs[i];
2120		if (!cb) {
2121			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2122					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2123			continue;
2124		}
2125
2126		tex = (struct r600_texture *)cb->base.texture;
2127		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2128				      &tex->resource, RADEON_USAGE_READWRITE,
2129				      tex->surface.nsamples > 1 ?
2130					      RADEON_PRIO_COLOR_BUFFER_MSAA :
2131					      RADEON_PRIO_COLOR_BUFFER);
2132
2133		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2134			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2135				tex->cmask_buffer, RADEON_USAGE_READWRITE,
2136				RADEON_PRIO_COLOR_META);
2137		}
2138
2139		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
2140		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
2141		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
2142		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
2143		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
2144		radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
2145		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
2146		radeon_emit(cs, 0);			/* R_028C78 unused */
2147		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
2148		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
2149		radeon_emit(cs, cb->cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
2150		radeon_emit(cs, cb->cb_color_fmask_slice);	/* R_028C88_CB_COLOR0_FMASK_SLICE */
2151		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2152		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2153	}
2154	/* set CB_COLOR1_INFO for possible dual-src blending */
2155	if (i == 1 && state->cbufs[0]) {
2156		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
2157				       cb->cb_color_info | tex->cb_color_info);
2158		i++;
2159	}
2160	for (; i < 8 ; i++) {
2161		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2162	}
2163
2164	/* ZS buffer. */
2165	if (state->zsbuf) {
2166		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2167		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2168
2169		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2170				      &rtex->resource, RADEON_USAGE_READWRITE,
2171				      zb->base.texture->nr_samples > 1 ?
2172					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
2173					      RADEON_PRIO_DEPTH_BUFFER);
2174
2175		if (zb->db_htile_data_base) {
2176			r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
2177					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
2178					      RADEON_PRIO_DEPTH_META);
2179		}
2180
2181		r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2182		r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2183
2184		r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2185		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
2186		radeon_emit(cs, zb->db_z_info);		/* R_028040_DB_Z_INFO */
2187		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
2188		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
2189		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
2190		radeon_emit(cs, zb->db_depth_base);	/* R_028050_DB_Z_WRITE_BASE */
2191		radeon_emit(cs, zb->db_stencil_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
2192		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
2193		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
2194
2195		r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2196		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
2197		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2198				       zb->pa_su_poly_offset_db_fmt_cntl);
2199	} else {
2200		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2201		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2202		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2203	}
2204
2205	/* Framebuffer dimensions. */
2206        /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2207	r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2208			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2209}
2210
2211static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
2212				     struct r600_atom *atom)
2213{
2214	struct si_context *sctx = (struct si_context *)rctx;
2215	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2216	unsigned nr_samples = sctx->framebuffer.nr_samples;
2217
2218	cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
2219						SI_NUM_SMOOTH_AA_SAMPLES);
2220}
2221
2222const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */
2223
2224static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
2225{
2226	struct si_context *sctx = (struct si_context *)rctx;
2227	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
2228
2229	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2230				sctx->ps_iter_samples,
2231				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
2232}
2233
2234const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
2235
2236static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2237{
2238	struct si_context *sctx = (struct si_context *)ctx;
2239
2240	if (sctx->ps_iter_samples == min_samples)
2241		return;
2242
2243	sctx->ps_iter_samples = min_samples;
2244
2245	if (sctx->framebuffer.nr_samples > 1)
2246		sctx->msaa_config.dirty = true;
2247}
2248
2249/*
2250 * Samplers
2251 */
2252
2253static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
2254							struct pipe_resource *texture,
2255							const struct pipe_sampler_view *state)
2256{
2257	struct si_context *sctx = (struct si_context*)ctx;
2258	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
2259	struct r600_texture *tmp = (struct r600_texture*)texture;
2260	const struct util_format_description *desc;
2261	unsigned format, num_format;
2262	uint32_t pitch = 0;
2263	unsigned char state_swizzle[4], swizzle[4];
2264	unsigned height, depth, width;
2265	enum pipe_format pipe_format = state->format;
2266	struct radeon_surface_level *surflevel;
2267	int first_non_void;
2268	uint64_t va;
2269
2270	if (view == NULL)
2271		return NULL;
2272
2273	/* initialize base object */
2274	view->base = *state;
2275	view->base.texture = NULL;
2276	view->base.reference.count = 1;
2277	view->base.context = ctx;
2278
2279	/* NULL resource, obey swizzle (only ZERO and ONE make sense). */
2280	if (!texture) {
2281		view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) |
2282				 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) |
2283				 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) |
2284				 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) |
2285				 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D);
2286		return &view->base;
2287	}
2288
2289	pipe_resource_reference(&view->base.texture, texture);
2290	view->resource = &tmp->resource;
2291
2292	/* Buffer resource. */
2293	if (texture->target == PIPE_BUFFER) {
2294		unsigned stride;
2295
2296		desc = util_format_description(state->format);
2297		first_non_void = util_format_get_first_non_void_channel(state->format);
2298		stride = desc->block.bits / 8;
2299		va = tmp->resource.gpu_address + state->u.buf.first_element*stride;
2300		format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2301		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2302
2303		view->state[4] = va;
2304		view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
2305				 S_008F04_STRIDE(stride);
2306		view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
2307		view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2308				 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2309				 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2310				 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2311				 S_008F0C_NUM_FORMAT(num_format) |
2312				 S_008F0C_DATA_FORMAT(format);
2313
2314		LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
2315		return &view->base;
2316	}
2317
2318	state_swizzle[0] = state->swizzle_r;
2319	state_swizzle[1] = state->swizzle_g;
2320	state_swizzle[2] = state->swizzle_b;
2321	state_swizzle[3] = state->swizzle_a;
2322
2323	surflevel = tmp->surface.level;
2324
2325	/* Texturing with separate depth and stencil. */
2326	if (tmp->is_depth && !tmp->is_flushing_texture) {
2327		switch (pipe_format) {
2328		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2329			pipe_format = PIPE_FORMAT_Z32_FLOAT;
2330			break;
2331		case PIPE_FORMAT_X8Z24_UNORM:
2332		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2333			/* Z24 is always stored like this. */
2334			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2335			break;
2336		case PIPE_FORMAT_X24S8_UINT:
2337		case PIPE_FORMAT_S8X24_UINT:
2338		case PIPE_FORMAT_X32_S8X24_UINT:
2339			pipe_format = PIPE_FORMAT_S8_UINT;
2340			surflevel = tmp->surface.stencil_level;
2341			break;
2342		default:;
2343		}
2344	}
2345
2346	desc = util_format_description(pipe_format);
2347
2348	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2349		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2350		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2351
2352		switch (pipe_format) {
2353		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2354		case PIPE_FORMAT_X24S8_UINT:
2355		case PIPE_FORMAT_X32_S8X24_UINT:
2356		case PIPE_FORMAT_X8Z24_UNORM:
2357			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2358			break;
2359		default:
2360			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2361		}
2362	} else {
2363		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2364	}
2365
2366	first_non_void = util_format_get_first_non_void_channel(pipe_format);
2367
2368	switch (pipe_format) {
2369	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2370		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2371		break;
2372	default:
2373		if (first_non_void < 0) {
2374			if (util_format_is_compressed(pipe_format)) {
2375				switch (pipe_format) {
2376				case PIPE_FORMAT_DXT1_SRGB:
2377				case PIPE_FORMAT_DXT1_SRGBA:
2378				case PIPE_FORMAT_DXT3_SRGBA:
2379				case PIPE_FORMAT_DXT5_SRGBA:
2380				case PIPE_FORMAT_BPTC_SRGBA:
2381					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2382					break;
2383				case PIPE_FORMAT_RGTC1_SNORM:
2384				case PIPE_FORMAT_LATC1_SNORM:
2385				case PIPE_FORMAT_RGTC2_SNORM:
2386				case PIPE_FORMAT_LATC2_SNORM:
2387				/* implies float, so use SNORM/UNORM to determine
2388				   whether data is signed or not */
2389				case PIPE_FORMAT_BPTC_RGB_FLOAT:
2390					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2391					break;
2392				default:
2393					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2394					break;
2395				}
2396			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2397				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2398			} else {
2399				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2400			}
2401		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2402			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2403		} else {
2404			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2405
2406			switch (desc->channel[first_non_void].type) {
2407			case UTIL_FORMAT_TYPE_FLOAT:
2408				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2409				break;
2410			case UTIL_FORMAT_TYPE_SIGNED:
2411				if (desc->channel[first_non_void].normalized)
2412					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2413				else if (desc->channel[first_non_void].pure_integer)
2414					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2415				else
2416					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2417				break;
2418			case UTIL_FORMAT_TYPE_UNSIGNED:
2419				if (desc->channel[first_non_void].normalized)
2420					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2421				else if (desc->channel[first_non_void].pure_integer)
2422					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2423				else
2424					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2425			}
2426		}
2427	}
2428
2429	format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2430	if (format == ~0) {
2431		format = 0;
2432	}
2433
2434	/* not supported any more */
2435	//endian = si_colorformat_endian_swap(format);
2436
2437	width = surflevel[0].npix_x;
2438	height = surflevel[0].npix_y;
2439	depth = surflevel[0].npix_z;
2440	pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
2441
2442	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2443	        height = 1;
2444		depth = texture->array_size;
2445	} else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2446		depth = texture->array_size;
2447	} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
2448		depth = texture->array_size / 6;
2449
2450	va = tmp->resource.gpu_address + surflevel[0].offset;
2451	va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
2452
2453	view->state[0] = va >> 8;
2454	view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2455			  S_008F14_DATA_FORMAT(format) |
2456			  S_008F14_NUM_FORMAT(num_format));
2457	view->state[2] = (S_008F18_WIDTH(width - 1) |
2458			  S_008F18_HEIGHT(height - 1));
2459	view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2460			  S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2461			  S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2462			  S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2463			  S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2464						      0 : state->u.tex.first_level - tmp->mipmap_shift) |
2465			  S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2466						      util_logbase2(texture->nr_samples) :
2467						      state->u.tex.last_level - tmp->mipmap_shift) |
2468			  S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
2469			  S_008F1C_POW2_PAD(texture->last_level > 0) |
2470			  S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2471	view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2472	view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2473			  S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2474	view->state[6] = 0;
2475	view->state[7] = 0;
2476
2477	/* Initialize the sampler view for FMASK. */
2478	if (tmp->fmask.size) {
2479		uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset;
2480		uint32_t fmask_format;
2481
2482		switch (texture->nr_samples) {
2483		case 2:
2484			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2485			break;
2486		case 4:
2487			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2488			break;
2489		case 8:
2490			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2491			break;
2492		default:
2493			assert(0);
2494			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2495		}
2496
2497		view->fmask_state[0] = va >> 8;
2498		view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2499				       S_008F14_DATA_FORMAT(fmask_format) |
2500				       S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2501		view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2502				       S_008F18_HEIGHT(height - 1);
2503		view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2504				       S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2505				       S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2506				       S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2507				       S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2508				       S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2509		view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2510				       S_008F20_PITCH(tmp->fmask.pitch - 1);
2511		view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2512				       S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2513		view->fmask_state[6] = 0;
2514		view->fmask_state[7] = 0;
2515	}
2516
2517	return &view->base;
2518}
2519
2520static void si_sampler_view_destroy(struct pipe_context *ctx,
2521				    struct pipe_sampler_view *state)
2522{
2523	struct si_sampler_view *view = (struct si_sampler_view *)state;
2524
2525	if (view->resource && view->resource->b.b.target == PIPE_BUFFER)
2526		LIST_DELINIT(&view->list);
2527
2528	pipe_resource_reference(&state->texture, NULL);
2529	FREE(view);
2530}
2531
2532static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2533{
2534	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2535	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2536	       (linear_filter &&
2537	        (wrap == PIPE_TEX_WRAP_CLAMP ||
2538		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2539}
2540
2541static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2542{
2543	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2544			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2545
2546	return (state->border_color.ui[0] || state->border_color.ui[1] ||
2547		state->border_color.ui[2] || state->border_color.ui[3]) &&
2548	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2549		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2550		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2551}
2552
2553static void *si_create_sampler_state(struct pipe_context *ctx,
2554				     const struct pipe_sampler_state *state)
2555{
2556	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
2557	unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2558	unsigned border_color_type;
2559
2560	if (rstate == NULL) {
2561		return NULL;
2562	}
2563
2564	if (sampler_state_needs_border_color(state))
2565		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2566	else
2567		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2568
2569	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2570			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2571			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2572			  r600_tex_aniso_filter(state->max_anisotropy) << 9 |
2573			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2574			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2575			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2576	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2577			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2578	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2579			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
2580			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
2581			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2582	rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2583
2584	if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2585		memcpy(rstate->border_color, state->border_color.ui,
2586		       sizeof(rstate->border_color));
2587	}
2588
2589	return rstate;
2590}
2591
2592/* Upload border colors and update the pointers in resource descriptors.
2593 * There can only be 4096 border colors per context.
2594 *
2595 * XXX: This is broken if the buffer gets reallocated.
2596 */
2597static void si_set_border_colors(struct si_context *sctx, unsigned count,
2598				 void **states)
2599{
2600	struct si_sampler_state **rstates = (struct si_sampler_state **)states;
2601	uint32_t *border_color_table = NULL;
2602	int i, j;
2603
2604	for (i = 0; i < count; i++) {
2605		if (rstates[i] &&
2606		    G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2607		    V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2608			if (!sctx->border_color_table ||
2609			    ((sctx->border_color_offset + count - i) &
2610			     C_008F3C_BORDER_COLOR_PTR)) {
2611				r600_resource_reference(&sctx->border_color_table, NULL);
2612				sctx->border_color_offset = 0;
2613
2614				sctx->border_color_table =
2615					si_resource_create_custom(&sctx->screen->b.b,
2616								  PIPE_USAGE_DYNAMIC,
2617								  4096 * 4 * 4);
2618			}
2619
2620			if (!border_color_table) {
2621			        border_color_table =
2622					sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
2623							     sctx->b.rings.gfx.cs,
2624							     PIPE_TRANSFER_WRITE |
2625							     PIPE_TRANSFER_UNSYNCHRONIZED);
2626			}
2627
2628			for (j = 0; j < 4; j++) {
2629				border_color_table[4 * sctx->border_color_offset + j] =
2630					util_le32_to_cpu(rstates[i]->border_color[j]);
2631			}
2632
2633			rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2634			rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
2635		}
2636	}
2637
2638	if (border_color_table) {
2639		struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
2640
2641		uint64_t va_offset = sctx->border_color_table->gpu_address;
2642
2643		si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2644		if (sctx->b.chip_class >= CIK)
2645			si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2646		si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
2647			      RADEON_PRIO_SHADER_DATA);
2648		si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
2649	}
2650}
2651
2652static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
2653                                   unsigned start, unsigned count,
2654                                   void **states)
2655{
2656	struct si_context *sctx = (struct si_context *)ctx;
2657
2658	if (!count || shader >= SI_NUM_SHADERS)
2659		return;
2660
2661	si_set_border_colors(sctx, count, states);
2662	si_set_sampler_descriptors(sctx, shader, start, count, states);
2663}
2664
2665static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2666{
2667	struct si_context *sctx = (struct si_context *)ctx;
2668	struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
2669	struct si_pm4_state *pm4 = &state->pm4;
2670	uint16_t mask = sample_mask;
2671
2672        if (state == NULL)
2673                return;
2674
2675	state->sample_mask = mask;
2676	si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
2677	si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
2678
2679	si_pm4_set_state(sctx, sample_mask, state);
2680}
2681
2682static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
2683{
2684	free(state);
2685}
2686
2687/*
2688 * Vertex elements & buffers
2689 */
2690
2691static void *si_create_vertex_elements(struct pipe_context *ctx,
2692				       unsigned count,
2693				       const struct pipe_vertex_element *elements)
2694{
2695	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
2696	int i;
2697
2698	assert(count < PIPE_MAX_ATTRIBS);
2699	if (!v)
2700		return NULL;
2701
2702	v->count = count;
2703	for (i = 0; i < count; ++i) {
2704		const struct util_format_description *desc;
2705		unsigned data_format, num_format;
2706		int first_non_void;
2707
2708		desc = util_format_description(elements[i].src_format);
2709		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
2710		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
2711		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
2712
2713		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2714				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2715				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2716				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2717				   S_008F0C_NUM_FORMAT(num_format) |
2718				   S_008F0C_DATA_FORMAT(data_format);
2719		v->format_size[i] = desc->block.bits / 8;
2720	}
2721	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
2722
2723	return v;
2724}
2725
2726static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
2727{
2728	struct si_context *sctx = (struct si_context *)ctx;
2729	struct si_vertex_element *v = (struct si_vertex_element*)state;
2730
2731	sctx->vertex_elements = v;
2732	sctx->vertex_buffers_dirty = true;
2733}
2734
2735static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
2736{
2737	struct si_context *sctx = (struct si_context *)ctx;
2738
2739	if (sctx->vertex_elements == state)
2740		sctx->vertex_elements = NULL;
2741	FREE(state);
2742}
2743
2744static void si_set_vertex_buffers(struct pipe_context *ctx,
2745				  unsigned start_slot, unsigned count,
2746				  const struct pipe_vertex_buffer *buffers)
2747{
2748	struct si_context *sctx = (struct si_context *)ctx;
2749	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
2750	int i;
2751
2752	assert(start_slot + count <= Elements(sctx->vertex_buffer));
2753
2754	if (buffers) {
2755		for (i = 0; i < count; i++) {
2756			const struct pipe_vertex_buffer *src = buffers + i;
2757			struct pipe_vertex_buffer *dsti = dst + i;
2758
2759			pipe_resource_reference(&dsti->buffer, src->buffer);
2760			dsti->buffer_offset = src->buffer_offset;
2761			dsti->stride = src->stride;
2762		}
2763	} else {
2764		for (i = 0; i < count; i++) {
2765			pipe_resource_reference(&dst[i].buffer, NULL);
2766		}
2767	}
2768	sctx->vertex_buffers_dirty = true;
2769}
2770
2771static void si_set_index_buffer(struct pipe_context *ctx,
2772				const struct pipe_index_buffer *ib)
2773{
2774	struct si_context *sctx = (struct si_context *)ctx;
2775
2776	if (ib) {
2777		pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
2778	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
2779	} else {
2780		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
2781	}
2782}
2783
2784/*
2785 * Misc
2786 */
2787static void si_set_polygon_stipple(struct pipe_context *ctx,
2788				   const struct pipe_poly_stipple *state)
2789{
2790	struct si_context *sctx = (struct si_context *)ctx;
2791	struct pipe_resource *tex;
2792	struct pipe_sampler_view *view;
2793	bool is_zero = true;
2794	bool is_one = true;
2795	int i;
2796
2797	/* The hardware obeys 0 and 1 swizzles in the descriptor even if
2798	 * the resource is NULL/invalid. Take advantage of this fact and skip
2799	 * texture allocation if the stipple pattern is constant.
2800	 *
2801	 * This is an optimization for the common case when stippling isn't
2802	 * used but set_polygon_stipple is still called by st/mesa.
2803	 */
2804	for (i = 0; i < Elements(state->stipple); i++) {
2805		is_zero = is_zero && state->stipple[i] == 0;
2806		is_one = is_one && state->stipple[i] == 0xffffffff;
2807	}
2808
2809	if (is_zero || is_one) {
2810		struct pipe_sampler_view templ = {{0}};
2811
2812		templ.swizzle_r = PIPE_SWIZZLE_ZERO;
2813		templ.swizzle_g = PIPE_SWIZZLE_ZERO;
2814		templ.swizzle_b = PIPE_SWIZZLE_ZERO;
2815		/* The pattern should be inverted in the texture. */
2816		templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO;
2817
2818		view = ctx->create_sampler_view(ctx, NULL, &templ);
2819	} else {
2820		/* Create a new texture. */
2821		tex = util_pstipple_create_stipple_texture(ctx, state->stipple);
2822		if (!tex)
2823			return;
2824
2825		view = util_pstipple_create_sampler_view(ctx, tex);
2826		pipe_resource_reference(&tex, NULL);
2827	}
2828
2829	ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT,
2830			       SI_POLY_STIPPLE_SAMPLER, 1, &view);
2831	pipe_sampler_view_reference(&view, NULL);
2832
2833	/* Bind the sampler state if needed. */
2834	if (!sctx->pstipple_sampler_state) {
2835		sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx);
2836		ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT,
2837					 SI_POLY_STIPPLE_SAMPLER, 1,
2838					 &sctx->pstipple_sampler_state);
2839	}
2840}
2841
2842static void si_texture_barrier(struct pipe_context *ctx)
2843{
2844	struct si_context *sctx = (struct si_context *)ctx;
2845
2846	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
2847			 SI_CONTEXT_INV_TC_L2 |
2848			 SI_CONTEXT_FLUSH_AND_INV_CB;
2849}
2850
2851static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
2852{
2853	struct pipe_blend_state blend;
2854
2855	memset(&blend, 0, sizeof(blend));
2856	blend.independent_blend_enable = true;
2857	blend.rt[0].colormask = 0xf;
2858	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
2859}
2860
2861static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
2862				 bool include_draw_vbo)
2863{
2864	si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
2865}
2866
2867void si_init_state_functions(struct si_context *sctx)
2868{
2869	si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
2870	si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
2871	si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
2872
2873	sctx->b.b.create_blend_state = si_create_blend_state;
2874	sctx->b.b.bind_blend_state = si_bind_blend_state;
2875	sctx->b.b.delete_blend_state = si_delete_blend_state;
2876	sctx->b.b.set_blend_color = si_set_blend_color;
2877
2878	sctx->b.b.create_rasterizer_state = si_create_rs_state;
2879	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
2880	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
2881
2882	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
2883	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
2884	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
2885
2886	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
2887	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
2888	sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
2889	sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
2890
2891	sctx->b.b.set_clip_state = si_set_clip_state;
2892	sctx->b.b.set_scissor_states = si_set_scissor_states;
2893	sctx->b.b.set_viewport_states = si_set_viewport_states;
2894	sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
2895
2896	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
2897	sctx->b.b.get_sample_position = cayman_get_sample_position;
2898
2899	sctx->b.b.create_sampler_state = si_create_sampler_state;
2900	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2901	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
2902
2903	sctx->b.b.create_sampler_view = si_create_sampler_view;
2904	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
2905
2906	sctx->b.b.set_sample_mask = si_set_sample_mask;
2907
2908	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
2909	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
2910	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
2911	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
2912	sctx->b.b.set_index_buffer = si_set_index_buffer;
2913
2914	sctx->b.b.texture_barrier = si_texture_barrier;
2915	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2916	sctx->b.b.set_min_samples = si_set_min_samples;
2917
2918	sctx->b.dma_copy = si_dma_copy;
2919	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
2920	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
2921
2922	sctx->b.b.draw_vbo = si_draw_vbo;
2923}
2924
2925static void
2926si_write_harvested_raster_configs(struct si_context *sctx,
2927				  struct si_pm4_state *pm4,
2928				  unsigned raster_config)
2929{
2930	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
2931	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
2932	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
2933	unsigned num_rb = sctx->screen->b.info.r600_num_backends;
2934	unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
2935	unsigned rb_per_se = num_rb / num_se;
2936	unsigned se0_mask = (1 << rb_per_se) - 1;
2937	unsigned se1_mask = se0_mask << rb_per_se;
2938	unsigned se;
2939
2940	assert(num_se == 1 || num_se == 2);
2941	assert(sh_per_se == 1 || sh_per_se == 2);
2942	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
2943
2944	/* XXX: I can't figure out what the *_XSEL and *_YSEL
2945	 * fields are for, so I'm leaving them as their default
2946	 * values. */
2947
2948	se0_mask &= rb_mask;
2949	se1_mask &= rb_mask;
2950	if (num_se == 2 && (!se0_mask || !se1_mask)) {
2951		raster_config &= C_028350_SE_MAP;
2952
2953		if (!se0_mask) {
2954			raster_config |=
2955				S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
2956		} else {
2957			raster_config |=
2958				S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
2959		}
2960	}
2961
2962	for (se = 0; se < num_se; se++) {
2963		unsigned raster_config_se = raster_config;
2964		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
2965		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
2966
2967		pkr0_mask &= rb_mask;
2968		pkr1_mask &= rb_mask;
2969		if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
2970			raster_config_se &= C_028350_PKR_MAP;
2971
2972			if (!pkr0_mask) {
2973				raster_config_se |=
2974					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
2975			} else {
2976				raster_config_se |=
2977					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
2978			}
2979		}
2980
2981		if (rb_per_pkr == 2) {
2982			unsigned rb0_mask = 1 << (se * rb_per_se);
2983			unsigned rb1_mask = rb0_mask << 1;
2984
2985			rb0_mask &= rb_mask;
2986			rb1_mask &= rb_mask;
2987			if (!rb0_mask || !rb1_mask) {
2988				raster_config_se &= C_028350_RB_MAP_PKR0;
2989
2990				if (!rb0_mask) {
2991					raster_config_se |=
2992						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
2993				} else {
2994					raster_config_se |=
2995						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
2996				}
2997			}
2998
2999			if (sh_per_se == 2) {
3000				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3001				rb1_mask = rb0_mask << 1;
3002				rb0_mask &= rb_mask;
3003				rb1_mask &= rb_mask;
3004				if (!rb0_mask || !rb1_mask) {
3005					raster_config_se &= C_028350_RB_MAP_PKR1;
3006
3007					if (!rb0_mask) {
3008						raster_config_se |=
3009							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3010					} else {
3011						raster_config_se |=
3012							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3013					}
3014				}
3015			}
3016		}
3017
3018		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3019			       SE_INDEX(se) | SH_BROADCAST_WRITES |
3020			       INSTANCE_BROADCAST_WRITES);
3021		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3022	}
3023
3024	si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3025		       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3026		       INSTANCE_BROADCAST_WRITES);
3027}
3028
3029void si_init_config(struct si_context *sctx)
3030{
3031	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3032
3033	if (pm4 == NULL)
3034		return;
3035
3036	si_cmd_context_control(pm4);
3037
3038	si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
3039	si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
3040	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
3041	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
3042	si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0);
3043	si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0);
3044	si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0);
3045	si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0);
3046	si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0);
3047	si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0);
3048	si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0);
3049	si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0);
3050
3051	/* FIXME calculate these values somehow ??? */
3052	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
3053	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3054	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3055
3056	si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
3057	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3058	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
3059	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3060
3061	si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
3062	si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
3063	si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
3064	si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
3065
3066	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3067	si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
3068	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3069	if (sctx->b.chip_class < CIK)
3070		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3071			       S_008A14_CLIP_VTX_REORDER_ENA(1));
3072
3073	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3074	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3075
3076	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3077
3078	if (sctx->b.chip_class >= CIK) {
3079		switch (sctx->screen->b.family) {
3080		case CHIP_BONAIRE:
3081			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
3082			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
3083			break;
3084		case CHIP_HAWAII:
3085			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
3086			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
3087			break;
3088		case CHIP_KAVERI:
3089			/* XXX todo */
3090		case CHIP_KABINI:
3091			/* XXX todo */
3092		case CHIP_MULLINS:
3093			/* XXX todo */
3094		default:
3095			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0);
3096			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
3097			break;
3098		}
3099	} else {
3100		unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
3101		unsigned num_rb = sctx->screen->b.info.r600_num_backends;
3102		unsigned raster_config;
3103
3104		switch (sctx->screen->b.family) {
3105		case CHIP_TAHITI:
3106		case CHIP_PITCAIRN:
3107			raster_config = 0x2a00126a;
3108			break;
3109		case CHIP_VERDE:
3110			raster_config = 0x0000124a;
3111			break;
3112		case CHIP_OLAND:
3113			raster_config = 0x00000082;
3114			break;
3115		case CHIP_HAINAN:
3116			raster_config = 0;
3117			break;
3118		default:
3119			fprintf(stderr,
3120				"radeonsi: Unknown GPU, using 0 for raster_config\n");
3121			raster_config = 0;
3122			break;
3123		}
3124
3125		/* Always use the default config when all backends are enabled
3126		 * (or when we failed to determine the enabled backends).
3127		 */
3128		if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
3129			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
3130				       raster_config);
3131		} else {
3132			si_write_harvested_raster_configs(sctx, pm4, raster_config);
3133		}
3134	}
3135
3136	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3137	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3138	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3139		       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3140	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3141	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3142		       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3143
3144	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3145	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3146	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
3147	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
3148	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
3149	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
3150	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
3151	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
3152	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
3153	si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
3154	si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
3155	si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0);
3156	si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0);
3157	si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
3158	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3159	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3160	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3161
3162	/* There is a hang if stencil is used and fast stencil is enabled
3163	 * regardless of whether HTILE is depth-only or not.
3164	 */
3165	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
3166		       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
3167		       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
3168		       S_02800C_FAST_STENCIL_DISABLE(1));
3169
3170	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3171	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3172	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
3173
3174	if (sctx->b.chip_class >= CIK) {
3175		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3176		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
3177		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3178	}
3179
3180	sctx->init_config = pm4;
3181}
3182