si_state.c revision 21d9a1b5ef51ce449e9a82641d0d605c5448b41c
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Christian König <christian.koenig@amd.com>
25 */
26
27#include "util/u_memory.h"
28#include "util/u_framebuffer.h"
29#include "util/u_blitter.h"
30#include "util/u_helpers.h"
31#include "util/u_math.h"
32#include "util/u_pack_color.h"
33#include "util/u_upload_mgr.h"
34#include "util/u_format_s3tc.h"
35#include "tgsi/tgsi_parse.h"
36#include "radeonsi_pipe.h"
37#include "radeonsi_shader.h"
38#include "si_state.h"
39#include "sid.h"
40
41static uint32_t cik_num_banks(uint32_t nbanks)
42{
43	switch (nbanks) {
44	case 2:
45		return V_02803C_ADDR_SURF_2_BANK;
46	case 4:
47		return V_02803C_ADDR_SURF_4_BANK;
48	case 8:
49	default:
50		return V_02803C_ADDR_SURF_8_BANK;
51	case 16:
52		return V_02803C_ADDR_SURF_16_BANK;
53	}
54}
55
56
57static unsigned cik_tile_split(unsigned tile_split)
58{
59	switch (tile_split) {
60	case 64:
61		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B;
62		break;
63	case 128:
64		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B;
65		break;
66	case 256:
67		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B;
68		break;
69	case 512:
70		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B;
71		break;
72	default:
73	case 1024:
74		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB;
75		break;
76	case 2048:
77		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB;
78		break;
79	case 4096:
80		tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB;
81		break;
82	}
83	return tile_split;
84}
85
86static unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect)
87{
88	switch (macro_tile_aspect) {
89	default:
90	case 1:
91		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1;
92		break;
93	case 2:
94		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2;
95		break;
96	case 4:
97		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4;
98		break;
99	case 8:
100		macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8;
101		break;
102	}
103	return macro_tile_aspect;
104}
105
106static unsigned cik_bank_wh(unsigned bankwh)
107{
108	switch (bankwh) {
109	default:
110	case 1:
111		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1;
112		break;
113	case 2:
114		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2;
115		break;
116	case 4:
117		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4;
118		break;
119	case 8:
120		bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8;
121		break;
122	}
123	return bankwh;
124}
125
126static unsigned cik_db_pipe_config(unsigned tile_pipes,
127				   unsigned num_rbs)
128{
129	unsigned pipe_config;
130
131	switch (tile_pipes) {
132	case 8:
133		pipe_config = V_02803C_X_ADDR_SURF_P8_32X32_16X16;
134		break;
135	case 4:
136	default:
137		if (num_rbs == 4)
138			pipe_config = V_02803C_X_ADDR_SURF_P4_16X16;
139		else
140			pipe_config = V_02803C_X_ADDR_SURF_P4_8X16;
141		break;
142	case 2:
143			pipe_config = V_02803C_ADDR_SURF_P2;
144		break;
145	}
146	return pipe_config;
147}
148
149/*
150 * inferred framebuffer and blender state
151 */
152static void si_update_fb_blend_state(struct r600_context *rctx)
153{
154	struct si_pm4_state *pm4;
155	struct si_state_blend *blend = rctx->queued.named.blend;
156	uint32_t mask;
157
158	if (blend == NULL)
159		return;
160
161	pm4 = si_pm4_alloc_state(rctx);
162	if (pm4 == NULL)
163		return;
164
165	mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
166	mask &= blend->cb_target_mask;
167	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
168
169	si_pm4_set_state(rctx, fb_blend, pm4);
170}
171
172/*
173 * Blender functions
174 */
175
176static uint32_t si_translate_blend_function(int blend_func)
177{
178	switch (blend_func) {
179	case PIPE_BLEND_ADD:
180		return V_028780_COMB_DST_PLUS_SRC;
181	case PIPE_BLEND_SUBTRACT:
182		return V_028780_COMB_SRC_MINUS_DST;
183	case PIPE_BLEND_REVERSE_SUBTRACT:
184		return V_028780_COMB_DST_MINUS_SRC;
185	case PIPE_BLEND_MIN:
186		return V_028780_COMB_MIN_DST_SRC;
187	case PIPE_BLEND_MAX:
188		return V_028780_COMB_MAX_DST_SRC;
189	default:
190		R600_ERR("Unknown blend function %d\n", blend_func);
191		assert(0);
192		break;
193	}
194	return 0;
195}
196
197static uint32_t si_translate_blend_factor(int blend_fact)
198{
199	switch (blend_fact) {
200	case PIPE_BLENDFACTOR_ONE:
201		return V_028780_BLEND_ONE;
202	case PIPE_BLENDFACTOR_SRC_COLOR:
203		return V_028780_BLEND_SRC_COLOR;
204	case PIPE_BLENDFACTOR_SRC_ALPHA:
205		return V_028780_BLEND_SRC_ALPHA;
206	case PIPE_BLENDFACTOR_DST_ALPHA:
207		return V_028780_BLEND_DST_ALPHA;
208	case PIPE_BLENDFACTOR_DST_COLOR:
209		return V_028780_BLEND_DST_COLOR;
210	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
211		return V_028780_BLEND_SRC_ALPHA_SATURATE;
212	case PIPE_BLENDFACTOR_CONST_COLOR:
213		return V_028780_BLEND_CONSTANT_COLOR;
214	case PIPE_BLENDFACTOR_CONST_ALPHA:
215		return V_028780_BLEND_CONSTANT_ALPHA;
216	case PIPE_BLENDFACTOR_ZERO:
217		return V_028780_BLEND_ZERO;
218	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
219		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
220	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
221		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
222	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
223		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
224	case PIPE_BLENDFACTOR_INV_DST_COLOR:
225		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
226	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
227		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
228	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
229		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
230	case PIPE_BLENDFACTOR_SRC1_COLOR:
231		return V_028780_BLEND_SRC1_COLOR;
232	case PIPE_BLENDFACTOR_SRC1_ALPHA:
233		return V_028780_BLEND_SRC1_ALPHA;
234	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
235		return V_028780_BLEND_INV_SRC1_COLOR;
236	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
237		return V_028780_BLEND_INV_SRC1_ALPHA;
238	default:
239		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
240		assert(0);
241		break;
242	}
243	return 0;
244}
245
246static void *si_create_blend_state_mode(struct pipe_context *ctx,
247					const struct pipe_blend_state *state,
248					unsigned mode)
249{
250	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
251	struct si_pm4_state *pm4 = &blend->pm4;
252
253	uint32_t color_control;
254
255	if (blend == NULL)
256		return NULL;
257
258	blend->alpha_to_one = state->alpha_to_one;
259
260	color_control = S_028808_MODE(mode);
261	if (state->logicop_enable) {
262		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
263	} else {
264		color_control |= S_028808_ROP3(0xcc);
265	}
266	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
267
268	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
269		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
270		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
271		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
272		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
273		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
274
275	blend->cb_target_mask = 0;
276	for (int i = 0; i < 8; i++) {
277		/* state->rt entries > 0 only written if independent blending */
278		const int j = state->independent_blend_enable ? i : 0;
279
280		unsigned eqRGB = state->rt[j].rgb_func;
281		unsigned srcRGB = state->rt[j].rgb_src_factor;
282		unsigned dstRGB = state->rt[j].rgb_dst_factor;
283		unsigned eqA = state->rt[j].alpha_func;
284		unsigned srcA = state->rt[j].alpha_src_factor;
285		unsigned dstA = state->rt[j].alpha_dst_factor;
286
287		unsigned blend_cntl = 0;
288
289		/* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
290		blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
291
292		if (!state->rt[j].blend_enable) {
293			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
294			continue;
295		}
296
297		blend_cntl |= S_028780_ENABLE(1);
298		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
299		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
300		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
301
302		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
303			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
304			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
305			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
306			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
307		}
308		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
309	}
310
311	return blend;
312}
313
314static void *si_create_blend_state(struct pipe_context *ctx,
315				   const struct pipe_blend_state *state)
316{
317	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
318}
319
320static void si_bind_blend_state(struct pipe_context *ctx, void *state)
321{
322	struct r600_context *rctx = (struct r600_context *)ctx;
323	si_pm4_bind_state(rctx, blend, (struct si_state_blend *)state);
324	si_update_fb_blend_state(rctx);
325}
326
327static void si_delete_blend_state(struct pipe_context *ctx, void *state)
328{
329	struct r600_context *rctx = (struct r600_context *)ctx;
330	si_pm4_delete_state(rctx, blend, (struct si_state_blend *)state);
331}
332
333static void si_set_blend_color(struct pipe_context *ctx,
334			       const struct pipe_blend_color *state)
335{
336	struct r600_context *rctx = (struct r600_context *)ctx;
337	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
338
339        if (pm4 == NULL)
340                return;
341
342	si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
343	si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
344	si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
345	si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
346
347	si_pm4_set_state(rctx, blend_color, pm4);
348}
349
350/*
351 * Clipping, scissors and viewport
352 */
353
354static void si_set_clip_state(struct pipe_context *ctx,
355			      const struct pipe_clip_state *state)
356{
357	struct r600_context *rctx = (struct r600_context *)ctx;
358	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
359	struct pipe_constant_buffer cb;
360
361	if (pm4 == NULL)
362		return;
363
364	for (int i = 0; i < 6; i++) {
365		si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
366			       fui(state->ucp[i][0]));
367		si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
368			       fui(state->ucp[i][1]));
369		si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
370			       fui(state->ucp[i][2]));
371		si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
372			       fui(state->ucp[i][3]));
373        }
374
375	cb.buffer = NULL;
376	cb.user_buffer = state->ucp;
377	cb.buffer_offset = 0;
378	cb.buffer_size = 4*4*8;
379	ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, 1, &cb);
380	pipe_resource_reference(&cb.buffer, NULL);
381
382	si_pm4_set_state(rctx, clip, pm4);
383}
384
385static void si_set_scissor_states(struct pipe_context *ctx,
386                                  unsigned start_slot,
387                                  unsigned num_scissors,
388                                  const struct pipe_scissor_state *state)
389{
390	struct r600_context *rctx = (struct r600_context *)ctx;
391	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
392	uint32_t tl, br;
393
394	if (pm4 == NULL)
395		return;
396
397	tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny);
398	br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
399	si_pm4_set_reg(pm4, R_028210_PA_SC_CLIPRECT_0_TL, tl);
400	si_pm4_set_reg(pm4, R_028214_PA_SC_CLIPRECT_0_BR, br);
401	si_pm4_set_reg(pm4, R_028218_PA_SC_CLIPRECT_1_TL, tl);
402	si_pm4_set_reg(pm4, R_02821C_PA_SC_CLIPRECT_1_BR, br);
403	si_pm4_set_reg(pm4, R_028220_PA_SC_CLIPRECT_2_TL, tl);
404	si_pm4_set_reg(pm4, R_028224_PA_SC_CLIPRECT_2_BR, br);
405	si_pm4_set_reg(pm4, R_028228_PA_SC_CLIPRECT_3_TL, tl);
406	si_pm4_set_reg(pm4, R_02822C_PA_SC_CLIPRECT_3_BR, br);
407
408	si_pm4_set_state(rctx, scissor, pm4);
409}
410
411static void si_set_viewport_states(struct pipe_context *ctx,
412                                   unsigned start_slot,
413                                   unsigned num_viewports,
414                                   const struct pipe_viewport_state *state)
415{
416	struct r600_context *rctx = (struct r600_context *)ctx;
417	struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
418	struct si_pm4_state *pm4 = &viewport->pm4;
419
420	if (viewport == NULL)
421		return;
422
423	viewport->viewport = *state;
424	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000);
425	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000);
426	si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
427	si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
428	si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
429	si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
430	si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
431	si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
432	si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
433
434	si_pm4_set_state(rctx, viewport, viewport);
435}
436
437/*
438 * inferred state between framebuffer and rasterizer
439 */
440static void si_update_fb_rs_state(struct r600_context *rctx)
441{
442	struct si_state_rasterizer *rs = rctx->queued.named.rasterizer;
443	struct si_pm4_state *pm4;
444	unsigned offset_db_fmt_cntl = 0, depth;
445	float offset_units;
446
447	if (!rs || !rctx->framebuffer.zsbuf)
448		return;
449
450	offset_units = rctx->queued.named.rasterizer->offset_units;
451	switch (rctx->framebuffer.zsbuf->texture->format) {
452	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
453	case PIPE_FORMAT_X8Z24_UNORM:
454	case PIPE_FORMAT_Z24X8_UNORM:
455	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
456		depth = -24;
457		offset_units *= 2.0f;
458		break;
459	case PIPE_FORMAT_Z32_FLOAT:
460	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
461		depth = -23;
462		offset_units *= 1.0f;
463		offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
464		break;
465	case PIPE_FORMAT_Z16_UNORM:
466		depth = -16;
467		offset_units *= 4.0f;
468		break;
469	default:
470		return;
471	}
472
473	pm4 = si_pm4_alloc_state(rctx);
474
475	if (pm4 == NULL)
476		return;
477
478	/* FIXME some of those reg can be computed with cso */
479	offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
480	si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
481		       fui(rctx->queued.named.rasterizer->offset_scale));
482	si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
483	si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
484		       fui(rctx->queued.named.rasterizer->offset_scale));
485	si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
486	si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, offset_db_fmt_cntl);
487
488	si_pm4_set_state(rctx, fb_rs, pm4);
489}
490
491/*
492 * Rasterizer
493 */
494
495static uint32_t si_translate_fill(uint32_t func)
496{
497	switch(func) {
498	case PIPE_POLYGON_MODE_FILL:
499		return V_028814_X_DRAW_TRIANGLES;
500	case PIPE_POLYGON_MODE_LINE:
501		return V_028814_X_DRAW_LINES;
502	case PIPE_POLYGON_MODE_POINT:
503		return V_028814_X_DRAW_POINTS;
504	default:
505		assert(0);
506		return V_028814_X_DRAW_POINTS;
507	}
508}
509
510static void *si_create_rs_state(struct pipe_context *ctx,
511				const struct pipe_rasterizer_state *state)
512{
513	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
514	struct si_pm4_state *pm4 = &rs->pm4;
515	unsigned tmp;
516	unsigned prov_vtx = 1, polygon_dual_mode;
517	unsigned clip_rule;
518	float psize_min, psize_max;
519
520	if (rs == NULL) {
521		return NULL;
522	}
523
524	rs->two_side = state->light_twoside;
525	rs->multisample_enable = state->multisample;
526	rs->clip_plane_enable = state->clip_plane_enable;
527
528	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
529				state->fill_back != PIPE_POLYGON_MODE_FILL);
530
531	if (state->flatshade_first)
532		prov_vtx = 0;
533
534	rs->flatshade = state->flatshade;
535	rs->sprite_coord_enable = state->sprite_coord_enable;
536	rs->pa_sc_line_stipple = state->line_stipple_enable ?
537				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
538				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
539	rs->pa_su_sc_mode_cntl =
540		S_028814_PROVOKING_VTX_LAST(prov_vtx) |
541		S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
542		S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
543		S_028814_FACE(!state->front_ccw) |
544		S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
545		S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
546		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
547		S_028814_POLY_MODE(polygon_dual_mode) |
548		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
549		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back));
550	rs->pa_cl_clip_cntl =
551		S_028810_PS_UCP_MODE(3) |
552		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
553		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
554		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
555
556	clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
557
558	/* offset */
559	rs->offset_units = state->offset_units;
560	rs->offset_scale = state->offset_scale * 12.0f;
561
562	tmp = S_0286D4_FLAT_SHADE_ENA(1);
563	if (state->sprite_coord_enable) {
564		tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
565			S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
566			S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
567			S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
568			S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
569		if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
570			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
571		}
572	}
573	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
574
575	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000);
576	/* point size 12.4 fixed point */
577	tmp = (unsigned)(state->point_size * 8.0);
578	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
579
580	if (state->point_size_per_vertex) {
581		psize_min = util_get_min_point_size(state);
582		psize_max = 8192;
583	} else {
584		/* Force the point size to be as if the vertex output was disabled. */
585		psize_min = state->point_size;
586		psize_max = state->point_size;
587	}
588	/* Divide by two, because 0.5 = 1 pixel. */
589	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
590			S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) |
591			S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2)));
592
593	tmp = (unsigned)state->line_width * 8;
594	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
595	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
596		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
597		       S_028A48_MSAA_ENABLE(state->multisample));
598
599	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
600		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
601		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
602	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
603	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
604	si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000);
605	si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000);
606
607	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
608	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule);
609
610	return rs;
611}
612
613static void si_bind_rs_state(struct pipe_context *ctx, void *state)
614{
615	struct r600_context *rctx = (struct r600_context *)ctx;
616	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
617
618	if (state == NULL)
619		return;
620
621	// TODO
622	rctx->sprite_coord_enable = rs->sprite_coord_enable;
623	rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
624	rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
625
626	si_pm4_bind_state(rctx, rasterizer, rs);
627	si_update_fb_rs_state(rctx);
628}
629
630static void si_delete_rs_state(struct pipe_context *ctx, void *state)
631{
632	struct r600_context *rctx = (struct r600_context *)ctx;
633	si_pm4_delete_state(rctx, rasterizer, (struct si_state_rasterizer *)state);
634}
635
636/*
637 * infeered state between dsa and stencil ref
638 */
639static void si_update_dsa_stencil_ref(struct r600_context *rctx)
640{
641	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
642	struct pipe_stencil_ref *ref = &rctx->stencil_ref;
643        struct si_state_dsa *dsa = rctx->queued.named.dsa;
644
645        if (pm4 == NULL)
646                return;
647
648	si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
649		       S_028430_STENCILTESTVAL(ref->ref_value[0]) |
650		       S_028430_STENCILMASK(dsa->valuemask[0]) |
651		       S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
652		       S_028430_STENCILOPVAL(1));
653	si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
654		       S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
655		       S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
656		       S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
657		       S_028434_STENCILOPVAL_BF(1));
658
659	si_pm4_set_state(rctx, dsa_stencil_ref, pm4);
660}
661
662static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
663				    const struct pipe_stencil_ref *state)
664{
665        struct r600_context *rctx = (struct r600_context *)ctx;
666        rctx->stencil_ref = *state;
667	si_update_dsa_stencil_ref(rctx);
668}
669
670
671/*
672 * DSA
673 */
674
675static uint32_t si_translate_stencil_op(int s_op)
676{
677	switch (s_op) {
678	case PIPE_STENCIL_OP_KEEP:
679		return V_02842C_STENCIL_KEEP;
680	case PIPE_STENCIL_OP_ZERO:
681		return V_02842C_STENCIL_ZERO;
682	case PIPE_STENCIL_OP_REPLACE:
683		return V_02842C_STENCIL_REPLACE_TEST;
684	case PIPE_STENCIL_OP_INCR:
685		return V_02842C_STENCIL_ADD_CLAMP;
686	case PIPE_STENCIL_OP_DECR:
687		return V_02842C_STENCIL_SUB_CLAMP;
688	case PIPE_STENCIL_OP_INCR_WRAP:
689		return V_02842C_STENCIL_ADD_WRAP;
690	case PIPE_STENCIL_OP_DECR_WRAP:
691		return V_02842C_STENCIL_SUB_WRAP;
692	case PIPE_STENCIL_OP_INVERT:
693		return V_02842C_STENCIL_INVERT;
694	default:
695		R600_ERR("Unknown stencil op %d", s_op);
696		assert(0);
697		break;
698	}
699	return 0;
700}
701
702static void *si_create_dsa_state(struct pipe_context *ctx,
703				 const struct pipe_depth_stencil_alpha_state *state)
704{
705	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
706	struct si_pm4_state *pm4 = &dsa->pm4;
707	unsigned db_depth_control;
708	unsigned db_render_override, db_render_control;
709	uint32_t db_stencil_control = 0;
710
711	if (dsa == NULL) {
712		return NULL;
713	}
714
715	dsa->valuemask[0] = state->stencil[0].valuemask;
716	dsa->valuemask[1] = state->stencil[1].valuemask;
717	dsa->writemask[0] = state->stencil[0].writemask;
718	dsa->writemask[1] = state->stencil[1].writemask;
719
720	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
721		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
722		S_028800_ZFUNC(state->depth.func);
723
724	/* stencil */
725	if (state->stencil[0].enabled) {
726		db_depth_control |= S_028800_STENCIL_ENABLE(1);
727		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
728		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
729		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
730		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
731
732		if (state->stencil[1].enabled) {
733			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
734			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
735			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
736			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
737			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
738		}
739	}
740
741	/* alpha */
742	if (state->alpha.enabled) {
743		dsa->alpha_func = state->alpha.func;
744		dsa->alpha_ref = state->alpha.ref_value;
745	} else {
746		dsa->alpha_func = PIPE_FUNC_ALWAYS;
747	}
748
749	/* misc */
750	db_render_control = 0;
751	db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
752		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
753		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
754	/* TODO db_render_override depends on query */
755	si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000);
756	si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000);
757	si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000);
758	si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000);
759	//si_pm4_set_reg(pm4, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control);
760	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
761	si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control);
762	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
763	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
764	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
765	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
766	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
767	dsa->db_render_override = db_render_override;
768
769	return dsa;
770}
771
772static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
773{
774        struct r600_context *rctx = (struct r600_context *)ctx;
775        struct si_state_dsa *dsa = state;
776
777        if (state == NULL)
778                return;
779
780	si_pm4_bind_state(rctx, dsa, dsa);
781	si_update_dsa_stencil_ref(rctx);
782}
783
784static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
785{
786	struct r600_context *rctx = (struct r600_context *)ctx;
787	si_pm4_delete_state(rctx, dsa, (struct si_state_dsa *)state);
788}
789
790static void *si_create_db_flush_dsa(struct r600_context *rctx, bool copy_depth,
791				    bool copy_stencil, int sample)
792{
793	struct pipe_depth_stencil_alpha_state dsa;
794        struct si_state_dsa *state;
795
796	memset(&dsa, 0, sizeof(dsa));
797
798	state = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
799	if (copy_depth || copy_stencil) {
800		si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
801			       S_028000_DEPTH_COPY(copy_depth) |
802			       S_028000_STENCIL_COPY(copy_stencil) |
803			       S_028000_COPY_CENTROID(1) |
804			       S_028000_COPY_SAMPLE(sample));
805	} else {
806		si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
807			       S_028000_DEPTH_COMPRESS_DISABLE(1) |
808			       S_028000_STENCIL_COMPRESS_DISABLE(1));
809		si_pm4_set_reg(&state->pm4, R_02800C_DB_RENDER_OVERRIDE,
810			       S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
811			       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
812			       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
813			       S_02800C_DISABLE_TILE_RATE_TILES(1));
814	}
815
816        return state;
817}
818
819/*
820 * format translation
821 */
822static uint32_t si_translate_colorformat(enum pipe_format format)
823{
824	switch (format) {
825	/* 8-bit buffers. */
826	case PIPE_FORMAT_A8_UNORM:
827	case PIPE_FORMAT_A8_SNORM:
828	case PIPE_FORMAT_A8_UINT:
829	case PIPE_FORMAT_A8_SINT:
830	case PIPE_FORMAT_I8_UNORM:
831	case PIPE_FORMAT_I8_SNORM:
832	case PIPE_FORMAT_I8_UINT:
833	case PIPE_FORMAT_I8_SINT:
834	case PIPE_FORMAT_L8_UNORM:
835	case PIPE_FORMAT_L8_SNORM:
836	case PIPE_FORMAT_L8_UINT:
837	case PIPE_FORMAT_L8_SINT:
838	case PIPE_FORMAT_L8_SRGB:
839	case PIPE_FORMAT_R8_UNORM:
840	case PIPE_FORMAT_R8_SNORM:
841	case PIPE_FORMAT_R8_UINT:
842	case PIPE_FORMAT_R8_SINT:
843		return V_028C70_COLOR_8;
844
845	/* 16-bit buffers. */
846	case PIPE_FORMAT_B5G6R5_UNORM:
847		return V_028C70_COLOR_5_6_5;
848
849	case PIPE_FORMAT_B5G5R5A1_UNORM:
850	case PIPE_FORMAT_B5G5R5X1_UNORM:
851		return V_028C70_COLOR_1_5_5_5;
852
853	case PIPE_FORMAT_B4G4R4A4_UNORM:
854	case PIPE_FORMAT_B4G4R4X4_UNORM:
855		return V_028C70_COLOR_4_4_4_4;
856
857	case PIPE_FORMAT_L8A8_UNORM:
858	case PIPE_FORMAT_L8A8_SNORM:
859	case PIPE_FORMAT_L8A8_UINT:
860	case PIPE_FORMAT_L8A8_SINT:
861	case PIPE_FORMAT_R8G8_SNORM:
862	case PIPE_FORMAT_R8G8_UNORM:
863	case PIPE_FORMAT_R8G8_UINT:
864	case PIPE_FORMAT_R8G8_SINT:
865		return V_028C70_COLOR_8_8;
866
867	case PIPE_FORMAT_Z16_UNORM:
868	case PIPE_FORMAT_R16_UNORM:
869	case PIPE_FORMAT_R16_SNORM:
870	case PIPE_FORMAT_R16_UINT:
871	case PIPE_FORMAT_R16_SINT:
872	case PIPE_FORMAT_R16_FLOAT:
873	case PIPE_FORMAT_L16_UNORM:
874	case PIPE_FORMAT_L16_SNORM:
875	case PIPE_FORMAT_L16_FLOAT:
876	case PIPE_FORMAT_I16_UNORM:
877	case PIPE_FORMAT_I16_SNORM:
878	case PIPE_FORMAT_I16_FLOAT:
879	case PIPE_FORMAT_A16_UNORM:
880	case PIPE_FORMAT_A16_SNORM:
881	case PIPE_FORMAT_A16_FLOAT:
882		return V_028C70_COLOR_16;
883
884	/* 32-bit buffers. */
885	case PIPE_FORMAT_A8B8G8R8_SRGB:
886	case PIPE_FORMAT_A8B8G8R8_UNORM:
887	case PIPE_FORMAT_A8R8G8B8_UNORM:
888	case PIPE_FORMAT_B8G8R8A8_SRGB:
889	case PIPE_FORMAT_B8G8R8A8_UNORM:
890	case PIPE_FORMAT_B8G8R8X8_UNORM:
891	case PIPE_FORMAT_R8G8B8A8_SNORM:
892	case PIPE_FORMAT_R8G8B8A8_UNORM:
893	case PIPE_FORMAT_R8G8B8X8_UNORM:
894	case PIPE_FORMAT_R8G8B8X8_SNORM:
895	case PIPE_FORMAT_R8G8B8X8_SRGB:
896	case PIPE_FORMAT_R8G8B8X8_UINT:
897	case PIPE_FORMAT_R8G8B8X8_SINT:
898	case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
899	case PIPE_FORMAT_X8B8G8R8_UNORM:
900	case PIPE_FORMAT_X8R8G8B8_UNORM:
901	case PIPE_FORMAT_R8G8B8A8_SSCALED:
902	case PIPE_FORMAT_R8G8B8A8_USCALED:
903	case PIPE_FORMAT_R8G8B8A8_SINT:
904	case PIPE_FORMAT_R8G8B8A8_UINT:
905		return V_028C70_COLOR_8_8_8_8;
906
907	case PIPE_FORMAT_R10G10B10A2_UNORM:
908	case PIPE_FORMAT_R10G10B10X2_SNORM:
909	case PIPE_FORMAT_B10G10R10A2_UNORM:
910	case PIPE_FORMAT_B10G10R10A2_UINT:
911	case PIPE_FORMAT_B10G10R10X2_UNORM:
912	case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
913		return V_028C70_COLOR_2_10_10_10;
914
915	case PIPE_FORMAT_Z24X8_UNORM:
916	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
917		return V_028C70_COLOR_8_24;
918
919	case PIPE_FORMAT_S8X24_UINT:
920	case PIPE_FORMAT_X8Z24_UNORM:
921	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
922		return V_028C70_COLOR_24_8;
923
924	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
925		return V_028C70_COLOR_X24_8_32_FLOAT;
926
927	case PIPE_FORMAT_I32_FLOAT:
928	case PIPE_FORMAT_L32_FLOAT:
929	case PIPE_FORMAT_R32_FLOAT:
930	case PIPE_FORMAT_A32_FLOAT:
931	case PIPE_FORMAT_Z32_FLOAT:
932		return V_028C70_COLOR_32;
933
934	case PIPE_FORMAT_L16A16_UNORM:
935	case PIPE_FORMAT_L16A16_SNORM:
936	case PIPE_FORMAT_L16A16_FLOAT:
937	case PIPE_FORMAT_R16G16_SSCALED:
938	case PIPE_FORMAT_R16G16_UNORM:
939	case PIPE_FORMAT_R16G16_SNORM:
940	case PIPE_FORMAT_R16G16_UINT:
941	case PIPE_FORMAT_R16G16_SINT:
942	case PIPE_FORMAT_R16G16_FLOAT:
943		return V_028C70_COLOR_16_16;
944
945	case PIPE_FORMAT_R11G11B10_FLOAT:
946		return V_028C70_COLOR_10_11_11;
947
948	/* 64-bit buffers. */
949	case PIPE_FORMAT_R16G16B16A16_UINT:
950	case PIPE_FORMAT_R16G16B16A16_SINT:
951	case PIPE_FORMAT_R16G16B16A16_USCALED:
952	case PIPE_FORMAT_R16G16B16A16_SSCALED:
953	case PIPE_FORMAT_R16G16B16A16_UNORM:
954	case PIPE_FORMAT_R16G16B16A16_SNORM:
955	case PIPE_FORMAT_R16G16B16A16_FLOAT:
956	case PIPE_FORMAT_R16G16B16X16_UNORM:
957	case PIPE_FORMAT_R16G16B16X16_SNORM:
958	case PIPE_FORMAT_R16G16B16X16_FLOAT:
959	case PIPE_FORMAT_R16G16B16X16_UINT:
960	case PIPE_FORMAT_R16G16B16X16_SINT:
961		return V_028C70_COLOR_16_16_16_16;
962
963	case PIPE_FORMAT_L32A32_FLOAT:
964	case PIPE_FORMAT_L32A32_UINT:
965	case PIPE_FORMAT_L32A32_SINT:
966	case PIPE_FORMAT_R32G32_FLOAT:
967	case PIPE_FORMAT_R32G32_USCALED:
968	case PIPE_FORMAT_R32G32_SSCALED:
969	case PIPE_FORMAT_R32G32_SINT:
970	case PIPE_FORMAT_R32G32_UINT:
971		return V_028C70_COLOR_32_32;
972
973	/* 128-bit buffers. */
974	case PIPE_FORMAT_R32G32B32A32_SNORM:
975	case PIPE_FORMAT_R32G32B32A32_UNORM:
976	case PIPE_FORMAT_R32G32B32A32_SSCALED:
977	case PIPE_FORMAT_R32G32B32A32_USCALED:
978	case PIPE_FORMAT_R32G32B32A32_SINT:
979	case PIPE_FORMAT_R32G32B32A32_UINT:
980	case PIPE_FORMAT_R32G32B32A32_FLOAT:
981	case PIPE_FORMAT_R32G32B32X32_FLOAT:
982	case PIPE_FORMAT_R32G32B32X32_UINT:
983	case PIPE_FORMAT_R32G32B32X32_SINT:
984		return V_028C70_COLOR_32_32_32_32;
985
986	/* YUV buffers. */
987	case PIPE_FORMAT_UYVY:
988	case PIPE_FORMAT_YUYV:
989	/* 96-bit buffers. */
990	case PIPE_FORMAT_R32G32B32_FLOAT:
991	/* 8-bit buffers. */
992	case PIPE_FORMAT_L4A4_UNORM:
993	case PIPE_FORMAT_R4A4_UNORM:
994	case PIPE_FORMAT_A4R4_UNORM:
995	default:
996		return V_028C70_COLOR_INVALID; /* Unsupported. */
997	}
998}
999
1000static uint32_t si_translate_colorswap(enum pipe_format format)
1001{
1002	switch (format) {
1003	/* 8-bit buffers. */
1004	case PIPE_FORMAT_L4A4_UNORM:
1005	case PIPE_FORMAT_A4R4_UNORM:
1006		return V_028C70_SWAP_ALT;
1007
1008	case PIPE_FORMAT_A8_UNORM:
1009	case PIPE_FORMAT_A8_SNORM:
1010	case PIPE_FORMAT_A8_UINT:
1011	case PIPE_FORMAT_A8_SINT:
1012	case PIPE_FORMAT_R4A4_UNORM:
1013		return V_028C70_SWAP_ALT_REV;
1014	case PIPE_FORMAT_I8_UNORM:
1015	case PIPE_FORMAT_I8_SNORM:
1016	case PIPE_FORMAT_L8_UNORM:
1017	case PIPE_FORMAT_L8_SNORM:
1018	case PIPE_FORMAT_I8_UINT:
1019	case PIPE_FORMAT_I8_SINT:
1020	case PIPE_FORMAT_L8_UINT:
1021	case PIPE_FORMAT_L8_SINT:
1022	case PIPE_FORMAT_L8_SRGB:
1023	case PIPE_FORMAT_R8_UNORM:
1024	case PIPE_FORMAT_R8_SNORM:
1025	case PIPE_FORMAT_R8_UINT:
1026	case PIPE_FORMAT_R8_SINT:
1027		return V_028C70_SWAP_STD;
1028
1029	/* 16-bit buffers. */
1030	case PIPE_FORMAT_B5G6R5_UNORM:
1031		return V_028C70_SWAP_STD_REV;
1032
1033	case PIPE_FORMAT_B5G5R5A1_UNORM:
1034	case PIPE_FORMAT_B5G5R5X1_UNORM:
1035		return V_028C70_SWAP_ALT;
1036
1037	case PIPE_FORMAT_B4G4R4A4_UNORM:
1038	case PIPE_FORMAT_B4G4R4X4_UNORM:
1039		return V_028C70_SWAP_ALT;
1040
1041	case PIPE_FORMAT_Z16_UNORM:
1042		return V_028C70_SWAP_STD;
1043
1044	case PIPE_FORMAT_L8A8_UNORM:
1045	case PIPE_FORMAT_L8A8_SNORM:
1046	case PIPE_FORMAT_L8A8_UINT:
1047	case PIPE_FORMAT_L8A8_SINT:
1048		return V_028C70_SWAP_ALT;
1049	case PIPE_FORMAT_R8G8_SNORM:
1050	case PIPE_FORMAT_R8G8_UNORM:
1051	case PIPE_FORMAT_R8G8_UINT:
1052	case PIPE_FORMAT_R8G8_SINT:
1053		return V_028C70_SWAP_STD;
1054
1055	case PIPE_FORMAT_I16_UNORM:
1056	case PIPE_FORMAT_I16_SNORM:
1057	case PIPE_FORMAT_I16_FLOAT:
1058	case PIPE_FORMAT_L16_UNORM:
1059	case PIPE_FORMAT_L16_SNORM:
1060	case PIPE_FORMAT_L16_FLOAT:
1061	case PIPE_FORMAT_R16_UNORM:
1062	case PIPE_FORMAT_R16_SNORM:
1063	case PIPE_FORMAT_R16_UINT:
1064	case PIPE_FORMAT_R16_SINT:
1065	case PIPE_FORMAT_R16_FLOAT:
1066		return V_028C70_SWAP_STD;
1067
1068	case PIPE_FORMAT_A16_UNORM:
1069	case PIPE_FORMAT_A16_SNORM:
1070	case PIPE_FORMAT_A16_FLOAT:
1071		return V_028C70_SWAP_ALT_REV;
1072
1073	/* 32-bit buffers. */
1074	case PIPE_FORMAT_A8B8G8R8_SRGB:
1075		return V_028C70_SWAP_STD_REV;
1076	case PIPE_FORMAT_B8G8R8A8_SRGB:
1077		return V_028C70_SWAP_ALT;
1078
1079	case PIPE_FORMAT_B8G8R8A8_UNORM:
1080	case PIPE_FORMAT_B8G8R8X8_UNORM:
1081		return V_028C70_SWAP_ALT;
1082
1083	case PIPE_FORMAT_A8R8G8B8_UNORM:
1084	case PIPE_FORMAT_X8R8G8B8_UNORM:
1085		return V_028C70_SWAP_ALT_REV;
1086	case PIPE_FORMAT_R8G8B8A8_SNORM:
1087	case PIPE_FORMAT_R8G8B8A8_UNORM:
1088	case PIPE_FORMAT_R8G8B8A8_SSCALED:
1089	case PIPE_FORMAT_R8G8B8A8_USCALED:
1090	case PIPE_FORMAT_R8G8B8A8_SINT:
1091	case PIPE_FORMAT_R8G8B8A8_UINT:
1092	case PIPE_FORMAT_R8G8B8X8_UNORM:
1093	case PIPE_FORMAT_R8G8B8X8_SNORM:
1094	case PIPE_FORMAT_R8G8B8X8_SRGB:
1095	case PIPE_FORMAT_R8G8B8X8_UINT:
1096	case PIPE_FORMAT_R8G8B8X8_SINT:
1097		return V_028C70_SWAP_STD;
1098
1099	case PIPE_FORMAT_A8B8G8R8_UNORM:
1100	case PIPE_FORMAT_X8B8G8R8_UNORM:
1101	/* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */
1102		return V_028C70_SWAP_STD_REV;
1103
1104	case PIPE_FORMAT_Z24X8_UNORM:
1105	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1106		return V_028C70_SWAP_STD;
1107
1108	case PIPE_FORMAT_S8X24_UINT:
1109	case PIPE_FORMAT_X8Z24_UNORM:
1110	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1111		return V_028C70_SWAP_STD_REV;
1112
1113	case PIPE_FORMAT_R10G10B10A2_UNORM:
1114	case PIPE_FORMAT_R10G10B10X2_SNORM:
1115	case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1116		return V_028C70_SWAP_STD;
1117
1118	case PIPE_FORMAT_B10G10R10A2_UNORM:
1119	case PIPE_FORMAT_B10G10R10A2_UINT:
1120	case PIPE_FORMAT_B10G10R10X2_UNORM:
1121		return V_028C70_SWAP_ALT;
1122
1123	case PIPE_FORMAT_R11G11B10_FLOAT:
1124	case PIPE_FORMAT_I32_FLOAT:
1125	case PIPE_FORMAT_L32_FLOAT:
1126	case PIPE_FORMAT_R32_FLOAT:
1127	case PIPE_FORMAT_R32_UINT:
1128	case PIPE_FORMAT_R32_SINT:
1129	case PIPE_FORMAT_Z32_FLOAT:
1130	case PIPE_FORMAT_R16G16_FLOAT:
1131	case PIPE_FORMAT_R16G16_UNORM:
1132	case PIPE_FORMAT_R16G16_SNORM:
1133	case PIPE_FORMAT_R16G16_UINT:
1134	case PIPE_FORMAT_R16G16_SINT:
1135		return V_028C70_SWAP_STD;
1136
1137	case PIPE_FORMAT_L16A16_UNORM:
1138	case PIPE_FORMAT_L16A16_SNORM:
1139	case PIPE_FORMAT_L16A16_FLOAT:
1140		return V_028C70_SWAP_ALT;
1141
1142	case PIPE_FORMAT_A32_FLOAT:
1143		return V_028C70_SWAP_ALT_REV;
1144
1145	/* 64-bit buffers. */
1146	case PIPE_FORMAT_R32G32_FLOAT:
1147	case PIPE_FORMAT_R32G32_UINT:
1148	case PIPE_FORMAT_R32G32_SINT:
1149	case PIPE_FORMAT_R16G16B16A16_UNORM:
1150	case PIPE_FORMAT_R16G16B16A16_SNORM:
1151	case PIPE_FORMAT_R16G16B16A16_USCALED:
1152	case PIPE_FORMAT_R16G16B16A16_SSCALED:
1153	case PIPE_FORMAT_R16G16B16A16_UINT:
1154	case PIPE_FORMAT_R16G16B16A16_SINT:
1155	case PIPE_FORMAT_R16G16B16A16_FLOAT:
1156	case PIPE_FORMAT_R16G16B16X16_UNORM:
1157	case PIPE_FORMAT_R16G16B16X16_SNORM:
1158	case PIPE_FORMAT_R16G16B16X16_FLOAT:
1159	case PIPE_FORMAT_R16G16B16X16_UINT:
1160	case PIPE_FORMAT_R16G16B16X16_SINT:
1161	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1162		return V_028C70_SWAP_STD;
1163
1164	case PIPE_FORMAT_L32A32_FLOAT:
1165	case PIPE_FORMAT_L32A32_UINT:
1166	case PIPE_FORMAT_L32A32_SINT:
1167		return V_028C70_SWAP_ALT;
1168
1169	/* 128-bit buffers. */
1170	case PIPE_FORMAT_R32G32B32A32_FLOAT:
1171	case PIPE_FORMAT_R32G32B32A32_SNORM:
1172	case PIPE_FORMAT_R32G32B32A32_UNORM:
1173	case PIPE_FORMAT_R32G32B32A32_SSCALED:
1174	case PIPE_FORMAT_R32G32B32A32_USCALED:
1175	case PIPE_FORMAT_R32G32B32A32_SINT:
1176	case PIPE_FORMAT_R32G32B32A32_UINT:
1177	case PIPE_FORMAT_R32G32B32X32_FLOAT:
1178	case PIPE_FORMAT_R32G32B32X32_UINT:
1179	case PIPE_FORMAT_R32G32B32X32_SINT:
1180		return V_028C70_SWAP_STD;
1181	default:
1182		R600_ERR("unsupported colorswap format %d\n", format);
1183		return ~0U;
1184	}
1185	return ~0U;
1186}
1187
1188static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1189{
1190	if (R600_BIG_ENDIAN) {
1191		switch(colorformat) {
1192		/* 8-bit buffers. */
1193		case V_028C70_COLOR_8:
1194			return V_028C70_ENDIAN_NONE;
1195
1196		/* 16-bit buffers. */
1197		case V_028C70_COLOR_5_6_5:
1198		case V_028C70_COLOR_1_5_5_5:
1199		case V_028C70_COLOR_4_4_4_4:
1200		case V_028C70_COLOR_16:
1201		case V_028C70_COLOR_8_8:
1202			return V_028C70_ENDIAN_8IN16;
1203
1204		/* 32-bit buffers. */
1205		case V_028C70_COLOR_8_8_8_8:
1206		case V_028C70_COLOR_2_10_10_10:
1207		case V_028C70_COLOR_8_24:
1208		case V_028C70_COLOR_24_8:
1209		case V_028C70_COLOR_16_16:
1210			return V_028C70_ENDIAN_8IN32;
1211
1212		/* 64-bit buffers. */
1213		case V_028C70_COLOR_16_16_16_16:
1214			return V_028C70_ENDIAN_8IN16;
1215
1216		case V_028C70_COLOR_32_32:
1217			return V_028C70_ENDIAN_8IN32;
1218
1219		/* 128-bit buffers. */
1220		case V_028C70_COLOR_32_32_32_32:
1221			return V_028C70_ENDIAN_8IN32;
1222		default:
1223			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1224		}
1225	} else {
1226		return V_028C70_ENDIAN_NONE;
1227	}
1228}
1229
1230/* Returns the size in bits of the widest component of a CB format */
1231static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
1232{
1233	switch(colorformat) {
1234	case V_028C70_COLOR_4_4_4_4:
1235		return 4;
1236
1237	case V_028C70_COLOR_1_5_5_5:
1238	case V_028C70_COLOR_5_5_5_1:
1239		return 5;
1240
1241	case V_028C70_COLOR_5_6_5:
1242		return 6;
1243
1244	case V_028C70_COLOR_8:
1245	case V_028C70_COLOR_8_8:
1246	case V_028C70_COLOR_8_8_8_8:
1247		return 8;
1248
1249	case V_028C70_COLOR_10_10_10_2:
1250	case V_028C70_COLOR_2_10_10_10:
1251		return 10;
1252
1253	case V_028C70_COLOR_10_11_11:
1254	case V_028C70_COLOR_11_11_10:
1255		return 11;
1256
1257	case V_028C70_COLOR_16:
1258	case V_028C70_COLOR_16_16:
1259	case V_028C70_COLOR_16_16_16_16:
1260		return 16;
1261
1262	case V_028C70_COLOR_8_24:
1263	case V_028C70_COLOR_24_8:
1264		return 24;
1265
1266	case V_028C70_COLOR_32:
1267	case V_028C70_COLOR_32_32:
1268	case V_028C70_COLOR_32_32_32_32:
1269	case V_028C70_COLOR_X24_8_32_FLOAT:
1270		return 32;
1271	}
1272
1273	assert(!"Unknown maximum component size");
1274	return 0;
1275}
1276
1277static uint32_t si_translate_dbformat(enum pipe_format format)
1278{
1279	switch (format) {
1280	case PIPE_FORMAT_Z16_UNORM:
1281		return V_028040_Z_16;
1282	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1283	case PIPE_FORMAT_X8Z24_UNORM:
1284	case PIPE_FORMAT_Z24X8_UNORM:
1285	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1286		return V_028040_Z_24; /* deprecated on SI */
1287	case PIPE_FORMAT_Z32_FLOAT:
1288	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1289		return V_028040_Z_32_FLOAT;
1290	default:
1291		return V_028040_Z_INVALID;
1292	}
1293}
1294
1295/*
1296 * Texture translation
1297 */
1298
1299static uint32_t si_translate_texformat(struct pipe_screen *screen,
1300				       enum pipe_format format,
1301				       const struct util_format_description *desc,
1302				       int first_non_void)
1303{
1304	struct r600_screen *rscreen = (struct r600_screen*)screen;
1305	bool enable_s3tc = rscreen->info.drm_minor >= 31;
1306	boolean uniform = TRUE;
1307	int i;
1308
1309	/* Colorspace (return non-RGB formats directly). */
1310	switch (desc->colorspace) {
1311	/* Depth stencil formats */
1312	case UTIL_FORMAT_COLORSPACE_ZS:
1313		switch (format) {
1314		case PIPE_FORMAT_Z16_UNORM:
1315			return V_008F14_IMG_DATA_FORMAT_16;
1316		case PIPE_FORMAT_X24S8_UINT:
1317		case PIPE_FORMAT_Z24X8_UNORM:
1318		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1319			return V_008F14_IMG_DATA_FORMAT_8_24;
1320		case PIPE_FORMAT_X8Z24_UNORM:
1321		case PIPE_FORMAT_S8X24_UINT:
1322		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1323			return V_008F14_IMG_DATA_FORMAT_24_8;
1324		case PIPE_FORMAT_S8_UINT:
1325			return V_008F14_IMG_DATA_FORMAT_8;
1326		case PIPE_FORMAT_Z32_FLOAT:
1327			return V_008F14_IMG_DATA_FORMAT_32;
1328		case PIPE_FORMAT_X32_S8X24_UINT:
1329		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1330			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1331		default:
1332			goto out_unknown;
1333		}
1334
1335	case UTIL_FORMAT_COLORSPACE_YUV:
1336		goto out_unknown; /* TODO */
1337
1338	case UTIL_FORMAT_COLORSPACE_SRGB:
1339		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1340			goto out_unknown;
1341		break;
1342
1343	default:
1344		break;
1345	}
1346
1347	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1348		if (!enable_s3tc)
1349			goto out_unknown;
1350
1351		switch (format) {
1352		case PIPE_FORMAT_RGTC1_SNORM:
1353		case PIPE_FORMAT_LATC1_SNORM:
1354		case PIPE_FORMAT_RGTC1_UNORM:
1355		case PIPE_FORMAT_LATC1_UNORM:
1356			return V_008F14_IMG_DATA_FORMAT_BC4;
1357		case PIPE_FORMAT_RGTC2_SNORM:
1358		case PIPE_FORMAT_LATC2_SNORM:
1359		case PIPE_FORMAT_RGTC2_UNORM:
1360		case PIPE_FORMAT_LATC2_UNORM:
1361			return V_008F14_IMG_DATA_FORMAT_BC5;
1362		default:
1363			goto out_unknown;
1364		}
1365	}
1366
1367	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1368
1369		if (!enable_s3tc)
1370			goto out_unknown;
1371
1372		if (!util_format_s3tc_enabled) {
1373			goto out_unknown;
1374		}
1375
1376		switch (format) {
1377		case PIPE_FORMAT_DXT1_RGB:
1378		case PIPE_FORMAT_DXT1_RGBA:
1379		case PIPE_FORMAT_DXT1_SRGB:
1380		case PIPE_FORMAT_DXT1_SRGBA:
1381			return V_008F14_IMG_DATA_FORMAT_BC1;
1382		case PIPE_FORMAT_DXT3_RGBA:
1383		case PIPE_FORMAT_DXT3_SRGBA:
1384			return V_008F14_IMG_DATA_FORMAT_BC2;
1385		case PIPE_FORMAT_DXT5_RGBA:
1386		case PIPE_FORMAT_DXT5_SRGBA:
1387			return V_008F14_IMG_DATA_FORMAT_BC3;
1388		default:
1389			goto out_unknown;
1390		}
1391	}
1392
1393	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1394		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1395	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1396		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1397	}
1398
1399	/* R8G8Bx_SNORM - TODO CxV8U8 */
1400
1401	/* See whether the components are of the same size. */
1402	for (i = 1; i < desc->nr_channels; i++) {
1403		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1404	}
1405
1406	/* Non-uniform formats. */
1407	if (!uniform) {
1408		switch(desc->nr_channels) {
1409		case 3:
1410			if (desc->channel[0].size == 5 &&
1411			    desc->channel[1].size == 6 &&
1412			    desc->channel[2].size == 5) {
1413				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1414			}
1415			goto out_unknown;
1416		case 4:
1417			if (desc->channel[0].size == 5 &&
1418			    desc->channel[1].size == 5 &&
1419			    desc->channel[2].size == 5 &&
1420			    desc->channel[3].size == 1) {
1421				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1422			}
1423			if (desc->channel[0].size == 10 &&
1424			    desc->channel[1].size == 10 &&
1425			    desc->channel[2].size == 10 &&
1426			    desc->channel[3].size == 2) {
1427				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1428			}
1429			goto out_unknown;
1430		}
1431		goto out_unknown;
1432	}
1433
1434	if (first_non_void < 0 || first_non_void > 3)
1435		goto out_unknown;
1436
1437	/* uniform formats */
1438	switch (desc->channel[first_non_void].size) {
1439	case 4:
1440		switch (desc->nr_channels) {
1441#if 0 /* Not supported for render targets */
1442		case 2:
1443			return V_008F14_IMG_DATA_FORMAT_4_4;
1444#endif
1445		case 4:
1446			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1447		}
1448		break;
1449	case 8:
1450		switch (desc->nr_channels) {
1451		case 1:
1452			return V_008F14_IMG_DATA_FORMAT_8;
1453		case 2:
1454			return V_008F14_IMG_DATA_FORMAT_8_8;
1455		case 4:
1456			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1457		}
1458		break;
1459	case 16:
1460		switch (desc->nr_channels) {
1461		case 1:
1462			return V_008F14_IMG_DATA_FORMAT_16;
1463		case 2:
1464			return V_008F14_IMG_DATA_FORMAT_16_16;
1465		case 4:
1466			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1467		}
1468		break;
1469	case 32:
1470		switch (desc->nr_channels) {
1471		case 1:
1472			return V_008F14_IMG_DATA_FORMAT_32;
1473		case 2:
1474			return V_008F14_IMG_DATA_FORMAT_32_32;
1475#if 0 /* Not supported for render targets */
1476		case 3:
1477			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1478#endif
1479		case 4:
1480			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1481		}
1482	}
1483
1484out_unknown:
1485	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1486	return ~0;
1487}
1488
1489static unsigned si_tex_wrap(unsigned wrap)
1490{
1491	switch (wrap) {
1492	default:
1493	case PIPE_TEX_WRAP_REPEAT:
1494		return V_008F30_SQ_TEX_WRAP;
1495	case PIPE_TEX_WRAP_CLAMP:
1496		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1497	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1498		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1499	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1500		return V_008F30_SQ_TEX_CLAMP_BORDER;
1501	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1502		return V_008F30_SQ_TEX_MIRROR;
1503	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1504		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1505	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1506		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1507	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1508		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1509	}
1510}
1511
1512static unsigned si_tex_filter(unsigned filter)
1513{
1514	switch (filter) {
1515	default:
1516	case PIPE_TEX_FILTER_NEAREST:
1517		return V_008F38_SQ_TEX_XY_FILTER_POINT;
1518	case PIPE_TEX_FILTER_LINEAR:
1519		return V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
1520	}
1521}
1522
1523static unsigned si_tex_mipfilter(unsigned filter)
1524{
1525	switch (filter) {
1526	case PIPE_TEX_MIPFILTER_NEAREST:
1527		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1528	case PIPE_TEX_MIPFILTER_LINEAR:
1529		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1530	default:
1531	case PIPE_TEX_MIPFILTER_NONE:
1532		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1533	}
1534}
1535
1536static unsigned si_tex_compare(unsigned compare)
1537{
1538	switch (compare) {
1539	default:
1540	case PIPE_FUNC_NEVER:
1541		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1542	case PIPE_FUNC_LESS:
1543		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1544	case PIPE_FUNC_EQUAL:
1545		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1546	case PIPE_FUNC_LEQUAL:
1547		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1548	case PIPE_FUNC_GREATER:
1549		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1550	case PIPE_FUNC_NOTEQUAL:
1551		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1552	case PIPE_FUNC_GEQUAL:
1553		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1554	case PIPE_FUNC_ALWAYS:
1555		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1556	}
1557}
1558
1559static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
1560{
1561	switch (dim) {
1562	default:
1563	case PIPE_TEXTURE_1D:
1564		return V_008F1C_SQ_RSRC_IMG_1D;
1565	case PIPE_TEXTURE_1D_ARRAY:
1566		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1567	case PIPE_TEXTURE_2D:
1568	case PIPE_TEXTURE_RECT:
1569		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1570					V_008F1C_SQ_RSRC_IMG_2D;
1571	case PIPE_TEXTURE_2D_ARRAY:
1572		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1573					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1574	case PIPE_TEXTURE_3D:
1575		return V_008F1C_SQ_RSRC_IMG_3D;
1576	case PIPE_TEXTURE_CUBE:
1577		return V_008F1C_SQ_RSRC_IMG_CUBE;
1578	}
1579}
1580
1581/*
1582 * Format support testing
1583 */
1584
1585static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1586{
1587	return si_translate_texformat(screen, format, util_format_description(format),
1588				      util_format_get_first_non_void_channel(format)) != ~0U;
1589}
1590
1591static uint32_t si_translate_vertexformat(struct pipe_screen *screen,
1592					  enum pipe_format format,
1593					  const struct util_format_description *desc,
1594					  int first_non_void)
1595{
1596	unsigned type = desc->channel[first_non_void].type;
1597	int i;
1598
1599	if (type == UTIL_FORMAT_TYPE_FIXED)
1600		return V_008F0C_BUF_DATA_FORMAT_INVALID;
1601
1602	/* See whether the components are of the same size. */
1603	for (i = 0; i < desc->nr_channels; i++) {
1604		if (desc->channel[first_non_void].size != desc->channel[i].size)
1605			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1606	}
1607
1608	switch (desc->channel[first_non_void].size) {
1609	case 8:
1610		switch (desc->nr_channels) {
1611		case 1:
1612			return V_008F0C_BUF_DATA_FORMAT_8;
1613		case 2:
1614			return V_008F0C_BUF_DATA_FORMAT_8_8;
1615		case 3:
1616		case 4:
1617			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1618		}
1619		break;
1620	case 16:
1621		switch (desc->nr_channels) {
1622		case 1:
1623			return V_008F0C_BUF_DATA_FORMAT_16;
1624		case 2:
1625			return V_008F0C_BUF_DATA_FORMAT_16_16;
1626		case 3:
1627		case 4:
1628			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1629		}
1630		break;
1631	case 32:
1632		if (type != UTIL_FORMAT_TYPE_FLOAT)
1633			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1634
1635		switch (desc->nr_channels) {
1636		case 1:
1637			return V_008F0C_BUF_DATA_FORMAT_32;
1638		case 2:
1639			return V_008F0C_BUF_DATA_FORMAT_32_32;
1640		case 3:
1641			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1642		case 4:
1643			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1644		}
1645		break;
1646	}
1647
1648	return V_008F0C_BUF_DATA_FORMAT_INVALID;
1649}
1650
1651static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1652{
1653	const struct util_format_description *desc;
1654	int first_non_void;
1655	unsigned data_format;
1656
1657	desc = util_format_description(format);
1658	first_non_void = util_format_get_first_non_void_channel(format);
1659	data_format = si_translate_vertexformat(screen, format, desc, first_non_void);
1660	return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1661}
1662
1663static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1664{
1665	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1666		si_translate_colorswap(format) != ~0U;
1667}
1668
1669static bool si_is_zs_format_supported(enum pipe_format format)
1670{
1671	return si_translate_dbformat(format) != V_028040_Z_INVALID;
1672}
1673
1674boolean si_is_format_supported(struct pipe_screen *screen,
1675                               enum pipe_format format,
1676                               enum pipe_texture_target target,
1677                               unsigned sample_count,
1678                               unsigned usage)
1679{
1680	struct r600_screen *rscreen = (struct r600_screen *)screen;
1681	unsigned retval = 0;
1682
1683	if (target >= PIPE_MAX_TEXTURE_TYPES) {
1684		R600_ERR("r600: unsupported texture type %d\n", target);
1685		return FALSE;
1686	}
1687
1688	if (!util_format_is_supported(format, usage))
1689		return FALSE;
1690
1691	if (sample_count > 1) {
1692		if (HAVE_LLVM < 0x0304 || rscreen->chip_class != SI)
1693			return FALSE;
1694
1695		switch (sample_count) {
1696		case 2:
1697		case 4:
1698		case 8:
1699			break;
1700		default:
1701			return FALSE;
1702		}
1703	}
1704
1705	if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
1706	    si_is_sampler_format_supported(screen, format)) {
1707		retval |= PIPE_BIND_SAMPLER_VIEW;
1708	}
1709
1710	if ((usage & (PIPE_BIND_RENDER_TARGET |
1711		      PIPE_BIND_DISPLAY_TARGET |
1712		      PIPE_BIND_SCANOUT |
1713		      PIPE_BIND_SHARED)) &&
1714	    si_is_colorbuffer_format_supported(format)) {
1715		retval |= usage &
1716			  (PIPE_BIND_RENDER_TARGET |
1717			   PIPE_BIND_DISPLAY_TARGET |
1718			   PIPE_BIND_SCANOUT |
1719			   PIPE_BIND_SHARED);
1720	}
1721
1722	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1723	    si_is_zs_format_supported(format)) {
1724		retval |= PIPE_BIND_DEPTH_STENCIL;
1725	}
1726
1727	if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1728	    si_is_vertex_format_supported(screen, format)) {
1729		retval |= PIPE_BIND_VERTEX_BUFFER;
1730	}
1731
1732	if (usage & PIPE_BIND_TRANSFER_READ)
1733		retval |= PIPE_BIND_TRANSFER_READ;
1734	if (usage & PIPE_BIND_TRANSFER_WRITE)
1735		retval |= PIPE_BIND_TRANSFER_WRITE;
1736
1737	return retval == usage;
1738}
1739
1740static unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
1741{
1742	unsigned tile_mode_index = 0;
1743
1744	if (stencil) {
1745		tile_mode_index = rtex->surface.stencil_tiling_index[level];
1746	} else {
1747		tile_mode_index = rtex->surface.tiling_index[level];
1748	}
1749	return tile_mode_index;
1750}
1751
1752/*
1753 * framebuffer handling
1754 */
1755
1756static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
1757		  const struct pipe_framebuffer_state *state, int cb)
1758{
1759	struct r600_texture *rtex;
1760	struct r600_surface *surf;
1761	unsigned level = state->cbufs[cb]->u.tex.level;
1762	unsigned pitch, slice;
1763	unsigned color_info, color_attrib;
1764	unsigned tile_mode_index;
1765	unsigned format, swap, ntype, endian;
1766	uint64_t offset;
1767	const struct util_format_description *desc;
1768	int i;
1769	unsigned blend_clamp = 0, blend_bypass = 0;
1770	unsigned max_comp_size;
1771
1772	surf = (struct r600_surface *)state->cbufs[cb];
1773	rtex = (struct r600_texture*)state->cbufs[cb]->texture;
1774
1775	offset = rtex->surface.level[level].offset;
1776	if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
1777		offset += rtex->surface.level[level].slice_size *
1778			  state->cbufs[cb]->u.tex.first_layer;
1779	}
1780	pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1781	slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1782	if (slice) {
1783		slice = slice - 1;
1784	}
1785
1786	tile_mode_index = si_tile_mode_index(rtex, level, false);
1787
1788	desc = util_format_description(surf->base.format);
1789	for (i = 0; i < 4; i++) {
1790		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1791			break;
1792		}
1793	}
1794	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1795		ntype = V_028C70_NUMBER_FLOAT;
1796	} else {
1797		ntype = V_028C70_NUMBER_UNORM;
1798		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1799			ntype = V_028C70_NUMBER_SRGB;
1800		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1801			if (desc->channel[i].pure_integer) {
1802				ntype = V_028C70_NUMBER_SINT;
1803			} else {
1804				assert(desc->channel[i].normalized);
1805				ntype = V_028C70_NUMBER_SNORM;
1806			}
1807		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1808			if (desc->channel[i].pure_integer) {
1809				ntype = V_028C70_NUMBER_UINT;
1810			} else {
1811				assert(desc->channel[i].normalized);
1812				ntype = V_028C70_NUMBER_UNORM;
1813			}
1814		}
1815	}
1816
1817	format = si_translate_colorformat(surf->base.format);
1818	if (format == V_028C70_COLOR_INVALID) {
1819		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
1820	}
1821	assert(format != V_028C70_COLOR_INVALID);
1822	swap = si_translate_colorswap(surf->base.format);
1823	if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
1824		endian = V_028C70_ENDIAN_NONE;
1825	} else {
1826		endian = si_colorformat_endian_swap(format);
1827	}
1828
1829	/* blend clamp should be set for all NORM/SRGB types */
1830	if (ntype == V_028C70_NUMBER_UNORM ||
1831	    ntype == V_028C70_NUMBER_SNORM ||
1832	    ntype == V_028C70_NUMBER_SRGB)
1833		blend_clamp = 1;
1834
1835	/* set blend bypass according to docs if SINT/UINT or
1836	   8/24 COLOR variants */
1837	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1838	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1839	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
1840		blend_clamp = 0;
1841		blend_bypass = 1;
1842	}
1843
1844	color_info = S_028C70_FORMAT(format) |
1845		S_028C70_COMP_SWAP(swap) |
1846		S_028C70_BLEND_CLAMP(blend_clamp) |
1847		S_028C70_BLEND_BYPASS(blend_bypass) |
1848		S_028C70_NUMBER_TYPE(ntype) |
1849		S_028C70_ENDIAN(endian);
1850
1851	color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
1852		S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
1853
1854	if (rtex->resource.b.b.nr_samples > 1) {
1855		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1856
1857		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1858				S_028C74_NUM_FRAGMENTS(log_samples);
1859
1860		if (rtex->fmask.size) {
1861			color_info |= S_028C70_COMPRESSION(1);
1862			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
1863
1864			/* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */
1865			color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) |
1866					S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1867		}
1868	}
1869
1870	if (rtex->cmask.size) {
1871		color_info |= S_028C70_FAST_CLEAR(1);
1872	}
1873
1874	offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
1875	offset >>= 8;
1876
1877	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
1878	si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE);
1879	si_pm4_set_reg(pm4, R_028C60_CB_COLOR0_BASE + cb * 0x3C, offset);
1880	si_pm4_set_reg(pm4, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_TILE_MAX(pitch));
1881	si_pm4_set_reg(pm4, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_TILE_MAX(slice));
1882
1883	if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
1884		si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, 0x00000000);
1885	} else {
1886		si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C,
1887			       S_028C6C_SLICE_START(state->cbufs[cb]->u.tex.first_layer) |
1888			       S_028C6C_SLICE_MAX(state->cbufs[cb]->u.tex.last_layer));
1889	}
1890	si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
1891	si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
1892
1893	if (rtex->cmask.size) {
1894		si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C,
1895			       offset + (rtex->cmask.offset >> 8));
1896		si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C,
1897			       S_028C80_TILE_MAX(rtex->cmask.slice_tile_max));
1898	}
1899	if (rtex->fmask.size) {
1900		si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C,
1901			       offset + (rtex->fmask.offset >> 8));
1902		si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C,
1903			       S_028C88_TILE_MAX(rtex->fmask.slice_tile_max));
1904	}
1905
1906	/* set CB_COLOR1_INFO for possible dual-src blending */
1907	if (state->nr_cbufs == 1) {
1908		assert(cb == 0);
1909		si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, color_info);
1910	}
1911
1912	/* Determine pixel shader export format */
1913	max_comp_size = si_colorformat_max_comp_size(format);
1914	if (ntype == V_028C70_NUMBER_SRGB ||
1915	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
1916	     max_comp_size <= 10) ||
1917	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
1918		rctx->export_16bpc |= 1 << cb;
1919		/* set SPI_SHADER_COL_FORMAT for possible dual-src blending */
1920		if (state->nr_cbufs == 1)
1921			rctx->export_16bpc |= 1 << 1;
1922	}
1923}
1924
1925static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
1926		  const struct pipe_framebuffer_state *state)
1927{
1928	struct r600_screen *rscreen = rctx->screen;
1929	struct r600_texture *rtex;
1930	struct r600_surface *surf;
1931	unsigned level, pitch, slice, format, tile_mode_index, array_mode;
1932	unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config;
1933	uint32_t z_info, s_info, db_depth_info;
1934	uint64_t z_offs, s_offs;
1935
1936	if (state->zsbuf == NULL) {
1937		si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, S_028040_FORMAT(V_028040_Z_INVALID));
1938		si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID));
1939		return;
1940	}
1941
1942	surf = (struct r600_surface *)state->zsbuf;
1943	level = surf->base.u.tex.level;
1944	rtex = (struct r600_texture*)surf->base.texture;
1945
1946	format = si_translate_dbformat(rtex->resource.b.b.format);
1947
1948	if (format == V_028040_Z_INVALID) {
1949		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
1950	}
1951	assert(format != V_028040_Z_INVALID);
1952
1953	s_offs = z_offs = r600_resource_va(rctx->context.screen, surf->base.texture);
1954	z_offs += rtex->surface.level[level].offset;
1955	s_offs += rtex->surface.stencil_level[level].offset;
1956
1957	z_offs >>= 8;
1958	s_offs >>= 8;
1959
1960	pitch = (rtex->surface.level[level].nblk_x / 8) - 1;
1961	slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1962	if (slice) {
1963		slice = slice - 1;
1964	}
1965
1966	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1967
1968	z_info = S_028040_FORMAT(format);
1969	if (rtex->resource.b.b.nr_samples > 1) {
1970		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1971	}
1972
1973	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
1974		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
1975	else
1976		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1977
1978	if (rctx->chip_class >= CIK) {
1979		switch (rtex->surface.level[level].mode) {
1980		case RADEON_SURF_MODE_2D:
1981			array_mode = V_02803C_ARRAY_2D_TILED_THIN1;
1982			break;
1983		case RADEON_SURF_MODE_1D:
1984		case RADEON_SURF_MODE_LINEAR_ALIGNED:
1985		case RADEON_SURF_MODE_LINEAR:
1986		default:
1987			array_mode = V_02803C_ARRAY_1D_TILED_THIN1;
1988			break;
1989		}
1990		tile_split = rtex->surface.tile_split;
1991		stile_split = rtex->surface.stencil_tile_split;
1992		macro_aspect = rtex->surface.mtilea;
1993		bankw = rtex->surface.bankw;
1994		bankh = rtex->surface.bankh;
1995		tile_split = cik_tile_split(tile_split);
1996		stile_split = cik_tile_split(stile_split);
1997		macro_aspect = cik_macro_tile_aspect(macro_aspect);
1998		bankw = cik_bank_wh(bankw);
1999		bankh = cik_bank_wh(bankh);
2000		nbanks = cik_num_banks(rscreen->tiling_info.num_banks);
2001		pipe_config = cik_db_pipe_config(rscreen->info.r600_num_tile_pipes,
2002						 rscreen->info.r600_num_backends);
2003
2004		db_depth_info |= S_02803C_ARRAY_MODE(array_mode) |
2005			S_02803C_PIPE_CONFIG(pipe_config) |
2006			S_02803C_BANK_WIDTH(bankw) |
2007			S_02803C_BANK_HEIGHT(bankh) |
2008			S_02803C_MACRO_TILE_ASPECT(macro_aspect) |
2009			S_02803C_NUM_BANKS(nbanks);
2010		z_info |= S_028040_TILE_SPLIT(tile_split);
2011		s_info |= S_028044_TILE_SPLIT(stile_split);
2012	} else {
2013		tile_mode_index = si_tile_mode_index(rtex, level, false);
2014		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2015		tile_mode_index = si_tile_mode_index(rtex, level, true);
2016		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2017	}
2018
2019	si_pm4_set_reg(pm4, R_028008_DB_DEPTH_VIEW,
2020		       S_028008_SLICE_START(state->zsbuf->u.tex.first_layer) |
2021		       S_028008_SLICE_MAX(state->zsbuf->u.tex.last_layer));
2022
2023	si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, db_depth_info);
2024	si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, z_info);
2025	si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, s_info);
2026
2027	si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE);
2028	si_pm4_set_reg(pm4, R_028048_DB_Z_READ_BASE, z_offs);
2029	si_pm4_set_reg(pm4, R_02804C_DB_STENCIL_READ_BASE, s_offs);
2030	si_pm4_set_reg(pm4, R_028050_DB_Z_WRITE_BASE, z_offs);
2031	si_pm4_set_reg(pm4, R_028054_DB_STENCIL_WRITE_BASE, s_offs);
2032
2033	si_pm4_set_reg(pm4, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch));
2034	si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice));
2035}
2036
2037#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
2038	(((s0x) & 0xf) | (((s0y) & 0xf) << 4) |		   \
2039	(((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) |	   \
2040	(((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) |	   \
2041	 (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
2042
2043/* 2xMSAA
2044 * There are two locations (-4, 4), (4, -4). */
2045static uint32_t sample_locs_2x[] = {
2046	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2047	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2048	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2049	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
2050};
2051static unsigned max_dist_2x = 4;
2052/* 4xMSAA
2053 * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
2054static uint32_t sample_locs_4x[] = {
2055	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2056	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2057	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2058	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
2059};
2060static unsigned max_dist_4x = 6;
2061/* Cayman/SI 8xMSAA */
2062static uint32_t cm_sample_locs_8x[] = {
2063	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2064	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2065	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2066	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
2067	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
2068	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
2069	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
2070	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
2071};
2072static unsigned cm_max_dist_8x = 8;
2073/* Cayman/SI 16xMSAA */
2074static uint32_t cm_sample_locs_16x[] = {
2075	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2076	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2077	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2078	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
2079	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2080	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2081	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2082	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
2083	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2084	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2085	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2086	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
2087	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2088	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2089	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2090	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
2091};
2092static unsigned cm_max_dist_16x = 8;
2093
2094static void si_get_sample_position(struct pipe_context *ctx,
2095				   unsigned sample_count,
2096				   unsigned sample_index,
2097				   float *out_value)
2098{
2099	int offset, index;
2100	struct {
2101		int idx:4;
2102	} val;
2103	switch (sample_count) {
2104	case 1:
2105	default:
2106		out_value[0] = out_value[1] = 0.5;
2107		break;
2108	case 2:
2109		offset = 4 * (sample_index * 2);
2110		val.idx = (sample_locs_2x[0] >> offset) & 0xf;
2111		out_value[0] = (float)(val.idx + 8) / 16.0f;
2112		val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
2113		out_value[1] = (float)(val.idx + 8) / 16.0f;
2114		break;
2115	case 4:
2116		offset = 4 * (sample_index * 2);
2117		val.idx = (sample_locs_4x[0] >> offset) & 0xf;
2118		out_value[0] = (float)(val.idx + 8) / 16.0f;
2119		val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
2120		out_value[1] = (float)(val.idx + 8) / 16.0f;
2121		break;
2122	case 8:
2123		offset = 4 * (sample_index % 4 * 2);
2124		index = (sample_index / 4) * 4;
2125		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
2126		out_value[0] = (float)(val.idx + 8) / 16.0f;
2127		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
2128		out_value[1] = (float)(val.idx + 8) / 16.0f;
2129		break;
2130	case 16:
2131		offset = 4 * (sample_index % 4 * 2);
2132		index = (sample_index / 4) * 4;
2133		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
2134		out_value[0] = (float)(val.idx + 8) / 16.0f;
2135		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
2136		out_value[1] = (float)(val.idx + 8) / 16.0f;
2137		break;
2138	}
2139}
2140
2141static void si_set_msaa_state(struct r600_context *rctx, struct si_pm4_state *pm4, int nr_samples)
2142{
2143	unsigned max_dist = 0;
2144
2145	switch (nr_samples) {
2146	default:
2147		nr_samples = 0;
2148		break;
2149	case 2:
2150		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
2151		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
2152		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
2153		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
2154		max_dist = max_dist_2x;
2155		break;
2156	case 4:
2157		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
2158		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
2159		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
2160		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
2161		max_dist = max_dist_4x;
2162		break;
2163	case 8:
2164		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]);
2165		si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]);
2166		si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0);
2167		si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0);
2168		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]);
2169		si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]);
2170		si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0);
2171		si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0);
2172		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]);
2173		si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]);
2174		si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0);
2175		si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0);
2176		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]);
2177		si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]);
2178		max_dist = cm_max_dist_8x;
2179		break;
2180	case 16:
2181		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]);
2182		si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]);
2183		si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]);
2184		si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]);
2185		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]);
2186		si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]);
2187		si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]);
2188		si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]);
2189		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]);
2190		si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]);
2191		si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]);
2192		si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]);
2193		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]);
2194		si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]);
2195		si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]);
2196		si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]);
2197		max_dist = cm_max_dist_16x;
2198		break;
2199	}
2200
2201	if (nr_samples > 1) {
2202		unsigned log_samples = util_logbase2(nr_samples);
2203
2204		si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL,
2205			       S_028BDC_LAST_PIXEL(1) |
2206			       S_028BDC_EXPAND_LINE_WIDTH(1));
2207		si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG,
2208			       S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
2209			       S_028BE0_MAX_SAMPLE_DIST(max_dist) |
2210			       S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples));
2211
2212		si_pm4_set_reg(pm4, R_028804_DB_EQAA,
2213			       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
2214			       S_028804_PS_ITER_SAMPLES(log_samples) |
2215			       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
2216			       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
2217			       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
2218			       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
2219	} else {
2220		si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1));
2221		si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0);
2222
2223		si_pm4_set_reg(pm4, R_028804_DB_EQAA,
2224			       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
2225			       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
2226	}
2227}
2228
2229static void si_set_framebuffer_state(struct pipe_context *ctx,
2230				     const struct pipe_framebuffer_state *state)
2231{
2232	struct r600_context *rctx = (struct r600_context *)ctx;
2233	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
2234	uint32_t tl, br;
2235	int tl_x, tl_y, br_x, br_y, nr_samples, i;
2236
2237	if (pm4 == NULL)
2238		return;
2239
2240	si_pm4_inval_fb_cache(pm4, state->nr_cbufs);
2241	rctx->flush_and_inv_cb_meta = true;
2242
2243	if (state->zsbuf)
2244		si_pm4_inval_zsbuf_cache(pm4);
2245
2246	util_copy_framebuffer_state(&rctx->framebuffer, state);
2247
2248	/* build states */
2249	rctx->export_16bpc = 0;
2250	rctx->fb_compressed_cb_mask = 0;
2251	for (i = 0; i < state->nr_cbufs; i++) {
2252		struct r600_texture *rtex =
2253			(struct r600_texture*)state->cbufs[i]->texture;
2254
2255		si_cb(rctx, pm4, state, i);
2256
2257		if (rtex->fmask.size || rtex->cmask.size) {
2258			rctx->fb_compressed_cb_mask |= 1 << i;
2259		}
2260	}
2261	for (; i < 8; i++) {
2262		si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2263			       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2264	}
2265
2266	assert(!(rctx->export_16bpc & ~0xff));
2267	si_db(rctx, pm4, state);
2268
2269	tl_x = 0;
2270	tl_y = 0;
2271	br_x = state->width;
2272	br_y = state->height;
2273
2274	tl = S_028240_TL_X(tl_x) | S_028240_TL_Y(tl_y);
2275	br = S_028244_BR_X(br_x) | S_028244_BR_Y(br_y);
2276
2277	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl);
2278	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, br);
2279	si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl);
2280	si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br);
2281	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl);
2282	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, br);
2283	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl);
2284	si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR, br);
2285	si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
2286	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
2287
2288	if (state->nr_cbufs)
2289		nr_samples = state->cbufs[0]->texture->nr_samples;
2290	else if (state->zsbuf)
2291		nr_samples = state->zsbuf->texture->nr_samples;
2292	else
2293		nr_samples = 0;
2294
2295	si_set_msaa_state(rctx, pm4, nr_samples);
2296	rctx->fb_log_samples = util_logbase2(nr_samples);
2297	rctx->fb_cb0_is_integer = state->nr_cbufs &&
2298				  util_format_is_pure_integer(state->cbufs[0]->format);
2299
2300	si_pm4_set_state(rctx, framebuffer, pm4);
2301	si_update_fb_rs_state(rctx);
2302	si_update_fb_blend_state(rctx);
2303}
2304
2305/*
2306 * shaders
2307 */
2308
2309/* Compute the key for the hw shader variant */
2310static INLINE void si_shader_selector_key(struct pipe_context *ctx,
2311					  struct si_pipe_shader_selector *sel,
2312					  union si_shader_key *key)
2313{
2314	struct r600_context *rctx = (struct r600_context *)ctx;
2315	memset(key, 0, sizeof(*key));
2316
2317	if (sel->type == PIPE_SHADER_VERTEX) {
2318		unsigned i;
2319		if (!rctx->vertex_elements)
2320			return;
2321
2322		for (i = 0; i < rctx->vertex_elements->count; ++i)
2323			key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor;
2324
2325		if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf0)
2326			key->vs.ucps_enabled |= 0x2;
2327		if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
2328			key->vs.ucps_enabled |= 0x1;
2329	} else if (sel->type == PIPE_SHADER_FRAGMENT) {
2330		if (sel->fs_write_all)
2331			key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
2332		key->ps.export_16bpc = rctx->export_16bpc;
2333
2334		if (rctx->queued.named.rasterizer) {
2335			key->ps.color_two_side = rctx->queued.named.rasterizer->two_side;
2336			key->ps.flatshade = rctx->queued.named.rasterizer->flatshade;
2337
2338			if (rctx->queued.named.blend) {
2339				key->ps.alpha_to_one = rctx->queued.named.blend->alpha_to_one &&
2340						       rctx->queued.named.rasterizer->multisample_enable &&
2341						       !rctx->fb_cb0_is_integer;
2342			}
2343		}
2344		if (rctx->queued.named.dsa) {
2345			key->ps.alpha_func = rctx->queued.named.dsa->alpha_func;
2346			key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref;
2347		} else {
2348			key->ps.alpha_func = PIPE_FUNC_ALWAYS;
2349		}
2350	}
2351}
2352
2353/* Select the hw shader variant depending on the current state.
2354 * (*dirty) is set to 1 if current variant was changed */
2355int si_shader_select(struct pipe_context *ctx,
2356		     struct si_pipe_shader_selector *sel,
2357		     unsigned *dirty)
2358{
2359	union si_shader_key key;
2360	struct si_pipe_shader * shader = NULL;
2361	int r;
2362
2363	si_shader_selector_key(ctx, sel, &key);
2364
2365	/* Check if we don't need to change anything.
2366	 * This path is also used for most shaders that don't need multiple
2367	 * variants, it will cost just a computation of the key and this
2368	 * test. */
2369	if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
2370		return 0;
2371	}
2372
2373	/* lookup if we have other variants in the list */
2374	if (sel->num_shaders > 1) {
2375		struct si_pipe_shader *p = sel->current, *c = p->next_variant;
2376
2377		while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
2378			p = c;
2379			c = c->next_variant;
2380		}
2381
2382		if (c) {
2383			p->next_variant = c->next_variant;
2384			shader = c;
2385		}
2386	}
2387
2388	if (unlikely(!shader)) {
2389		shader = CALLOC(1, sizeof(struct si_pipe_shader));
2390		shader->selector = sel;
2391		shader->key = key;
2392
2393		r = si_pipe_shader_create(ctx, shader);
2394		if (unlikely(r)) {
2395			R600_ERR("Failed to build shader variant (type=%u) %d\n",
2396				 sel->type, r);
2397			sel->current = NULL;
2398			FREE(shader);
2399			return r;
2400		}
2401
2402		/* We don't know the value of fs_write_all property until we built
2403		 * at least one variant, so we may need to recompute the key (include
2404		 * rctx->framebuffer.nr_cbufs) after building first variant. */
2405		if (sel->type == PIPE_SHADER_FRAGMENT &&
2406		    sel->num_shaders == 0 &&
2407		    shader->shader.fs_write_all) {
2408			sel->fs_write_all = 1;
2409			si_shader_selector_key(ctx, sel, &shader->key);
2410		}
2411
2412		sel->num_shaders++;
2413	}
2414
2415	if (dirty)
2416		*dirty = 1;
2417
2418	shader->next_variant = sel->current;
2419	sel->current = shader;
2420
2421	return 0;
2422}
2423
2424static void *si_create_shader_state(struct pipe_context *ctx,
2425				    const struct pipe_shader_state *state,
2426				    unsigned pipe_shader_type)
2427{
2428	struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
2429	int r;
2430
2431	sel->type = pipe_shader_type;
2432	sel->tokens = tgsi_dup_tokens(state->tokens);
2433	sel->so = state->stream_output;
2434
2435	r = si_shader_select(ctx, sel, NULL);
2436	if (r) {
2437	    free(sel);
2438	    return NULL;
2439	}
2440
2441	return sel;
2442}
2443
2444static void *si_create_fs_state(struct pipe_context *ctx,
2445				const struct pipe_shader_state *state)
2446{
2447	return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
2448}
2449
2450static void *si_create_vs_state(struct pipe_context *ctx,
2451				const struct pipe_shader_state *state)
2452{
2453	return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
2454}
2455
2456static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
2457{
2458	struct r600_context *rctx = (struct r600_context *)ctx;
2459	struct si_pipe_shader_selector *sel = state;
2460
2461	if (rctx->vs_shader == sel)
2462		return;
2463
2464	rctx->vs_shader = sel;
2465
2466	if (sel && sel->current)
2467		si_pm4_bind_state(rctx, vs, sel->current->pm4);
2468	else
2469		si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
2470}
2471
2472static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
2473{
2474	struct r600_context *rctx = (struct r600_context *)ctx;
2475	struct si_pipe_shader_selector *sel = state;
2476
2477	if (rctx->ps_shader == sel)
2478		return;
2479
2480	rctx->ps_shader = sel;
2481
2482	if (sel && sel->current)
2483		si_pm4_bind_state(rctx, ps, sel->current->pm4);
2484	else
2485		si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4);
2486}
2487
2488static void si_delete_shader_selector(struct pipe_context *ctx,
2489				      struct si_pipe_shader_selector *sel)
2490{
2491	struct r600_context *rctx = (struct r600_context *)ctx;
2492	struct si_pipe_shader *p = sel->current, *c;
2493
2494	while (p) {
2495		c = p->next_variant;
2496		si_pm4_delete_state(rctx, vs, p->pm4);
2497		si_pipe_shader_destroy(ctx, p);
2498		free(p);
2499		p = c;
2500	}
2501
2502	free(sel->tokens);
2503	free(sel);
2504 }
2505
2506static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
2507{
2508	struct r600_context *rctx = (struct r600_context *)ctx;
2509	struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2510
2511	if (rctx->vs_shader == sel) {
2512		rctx->vs_shader = NULL;
2513	}
2514
2515	si_delete_shader_selector(ctx, sel);
2516}
2517
2518static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
2519{
2520	struct r600_context *rctx = (struct r600_context *)ctx;
2521	struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
2522
2523	if (rctx->ps_shader == sel) {
2524		rctx->ps_shader = NULL;
2525	}
2526
2527	si_delete_shader_selector(ctx, sel);
2528}
2529
2530/*
2531 * Samplers
2532 */
2533
2534static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
2535							struct pipe_resource *texture,
2536							const struct pipe_sampler_view *state)
2537{
2538	struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
2539	struct r600_texture *tmp = (struct r600_texture*)texture;
2540	const struct util_format_description *desc;
2541	unsigned format, num_format;
2542	uint32_t pitch = 0;
2543	unsigned char state_swizzle[4], swizzle[4];
2544	unsigned height, depth, width;
2545	enum pipe_format pipe_format = state->format;
2546	struct radeon_surface_level *surflevel;
2547	int first_non_void;
2548	uint64_t va;
2549
2550	if (view == NULL)
2551		return NULL;
2552
2553	/* initialize base object */
2554	view->base = *state;
2555	view->base.texture = NULL;
2556	pipe_reference(NULL, &texture->reference);
2557	view->base.texture = texture;
2558	view->base.reference.count = 1;
2559	view->base.context = ctx;
2560
2561	state_swizzle[0] = state->swizzle_r;
2562	state_swizzle[1] = state->swizzle_g;
2563	state_swizzle[2] = state->swizzle_b;
2564	state_swizzle[3] = state->swizzle_a;
2565
2566	surflevel = tmp->surface.level;
2567
2568	/* Texturing with separate depth and stencil. */
2569	if (tmp->is_depth && !tmp->is_flushing_texture) {
2570		switch (pipe_format) {
2571		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2572			pipe_format = PIPE_FORMAT_Z32_FLOAT;
2573			break;
2574		case PIPE_FORMAT_X8Z24_UNORM:
2575		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2576			/* Z24 is always stored like this. */
2577			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2578			break;
2579		case PIPE_FORMAT_X24S8_UINT:
2580		case PIPE_FORMAT_S8X24_UINT:
2581		case PIPE_FORMAT_X32_S8X24_UINT:
2582			pipe_format = PIPE_FORMAT_S8_UINT;
2583			surflevel = tmp->surface.stencil_level;
2584			break;
2585		default:;
2586		}
2587	}
2588
2589	desc = util_format_description(pipe_format);
2590
2591	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2592		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2593		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2594
2595		switch (pipe_format) {
2596		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2597		case PIPE_FORMAT_X24S8_UINT:
2598		case PIPE_FORMAT_X32_S8X24_UINT:
2599		case PIPE_FORMAT_X8Z24_UNORM:
2600			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2601			break;
2602		default:
2603			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2604		}
2605	} else {
2606		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2607	}
2608
2609	first_non_void = util_format_get_first_non_void_channel(pipe_format);
2610
2611	switch (pipe_format) {
2612	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2613		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2614		break;
2615	default:
2616		if (first_non_void < 0) {
2617			if (util_format_is_compressed(pipe_format)) {
2618				switch (pipe_format) {
2619				case PIPE_FORMAT_DXT1_SRGB:
2620				case PIPE_FORMAT_DXT1_SRGBA:
2621				case PIPE_FORMAT_DXT3_SRGBA:
2622				case PIPE_FORMAT_DXT5_SRGBA:
2623					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2624					break;
2625				case PIPE_FORMAT_RGTC1_SNORM:
2626				case PIPE_FORMAT_LATC1_SNORM:
2627				case PIPE_FORMAT_RGTC2_SNORM:
2628				case PIPE_FORMAT_LATC2_SNORM:
2629					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2630					break;
2631				default:
2632					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2633					break;
2634				}
2635			} else {
2636				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2637			}
2638		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2639			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2640		} else {
2641			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2642
2643			switch (desc->channel[first_non_void].type) {
2644			case UTIL_FORMAT_TYPE_FLOAT:
2645				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2646				break;
2647			case UTIL_FORMAT_TYPE_SIGNED:
2648				if (desc->channel[first_non_void].normalized)
2649					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2650				else if (desc->channel[first_non_void].pure_integer)
2651					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2652				else
2653					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2654				break;
2655			case UTIL_FORMAT_TYPE_UNSIGNED:
2656				if (desc->channel[first_non_void].normalized)
2657					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2658				else if (desc->channel[first_non_void].pure_integer)
2659					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2660				else
2661					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2662			}
2663		}
2664	}
2665
2666	format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
2667	if (format == ~0) {
2668		format = 0;
2669	}
2670
2671	view->resource = &tmp->resource;
2672
2673	/* not supported any more */
2674	//endian = si_colorformat_endian_swap(format);
2675
2676	width = surflevel[0].npix_x;
2677	height = surflevel[0].npix_y;
2678	depth = surflevel[0].npix_z;
2679	pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
2680
2681	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
2682	        height = 1;
2683		depth = texture->array_size;
2684	} else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
2685		depth = texture->array_size;
2686	}
2687
2688	va = r600_resource_va(ctx->screen, texture);
2689	va += surflevel[0].offset;
2690	view->state[0] = va >> 8;
2691	view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2692			  S_008F14_DATA_FORMAT(format) |
2693			  S_008F14_NUM_FORMAT(num_format));
2694	view->state[2] = (S_008F18_WIDTH(width - 1) |
2695			  S_008F18_HEIGHT(height - 1));
2696	view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2697			  S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2698			  S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2699			  S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2700			  S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
2701						      0 : state->u.tex.first_level) |
2702			  S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
2703						      util_logbase2(texture->nr_samples) :
2704						      state->u.tex.last_level) |
2705			  S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
2706			  S_008F1C_POW2_PAD(texture->last_level > 0) |
2707			  S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
2708	view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2709	view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2710			  S_008F24_LAST_ARRAY(state->u.tex.last_layer));
2711	view->state[6] = 0;
2712	view->state[7] = 0;
2713
2714	/* Initialize the sampler view for FMASK. */
2715	if (tmp->fmask.size) {
2716		uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset;
2717		uint32_t fmask_format;
2718
2719		switch (texture->nr_samples) {
2720		case 2:
2721			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2722			break;
2723		case 4:
2724			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2725			break;
2726		case 8:
2727			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2728			break;
2729		default:
2730			assert(0);
2731		}
2732
2733		view->fmask_state[0] = va >> 8;
2734		view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2735				       S_008F14_DATA_FORMAT(fmask_format) |
2736				       S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2737		view->fmask_state[2] = S_008F18_WIDTH(width - 1) |
2738				       S_008F18_HEIGHT(height - 1);
2739		view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2740				       S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2741				       S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2742				       S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2743				       S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
2744				       S_008F1C_TYPE(si_tex_dim(texture->target, 0));
2745		view->fmask_state[4] = S_008F20_PITCH(tmp->fmask.pitch - 1);
2746		view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
2747				       S_008F24_LAST_ARRAY(state->u.tex.last_layer);
2748		view->fmask_state[6] = 0;
2749		view->fmask_state[7] = 0;
2750	}
2751
2752	return &view->base;
2753}
2754
2755static void si_sampler_view_destroy(struct pipe_context *ctx,
2756				    struct pipe_sampler_view *state)
2757{
2758	struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state;
2759
2760	pipe_resource_reference(&state->texture, NULL);
2761	FREE(resource);
2762}
2763
2764static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
2765{
2766	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
2767	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
2768	       (linear_filter &&
2769	        (wrap == PIPE_TEX_WRAP_CLAMP ||
2770		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
2771}
2772
2773static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
2774{
2775	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2776			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
2777
2778	return (state->border_color.ui[0] || state->border_color.ui[1] ||
2779		state->border_color.ui[2] || state->border_color.ui[3]) &&
2780	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
2781		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
2782		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
2783}
2784
2785static void *si_create_sampler_state(struct pipe_context *ctx,
2786				     const struct pipe_sampler_state *state)
2787{
2788	struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state);
2789	unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
2790	unsigned border_color_type;
2791
2792	if (rstate == NULL) {
2793		return NULL;
2794	}
2795
2796	if (sampler_state_needs_border_color(state))
2797		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
2798	else
2799		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2800
2801	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
2802			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
2803			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
2804			  (state->max_anisotropy & 0x7) << 9 | /* XXX */
2805			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
2806			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
2807			  aniso_flag_offset << 16 | /* XXX */
2808			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
2809	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
2810			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
2811	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
2812			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
2813			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
2814			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
2815	rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
2816
2817	if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2818		memcpy(rstate->border_color, state->border_color.ui,
2819		       sizeof(rstate->border_color));
2820	}
2821
2822	return rstate;
2823}
2824
2825/* XXX consider moving this function to si_descriptors.c for gcc to inline
2826 *     the si_set_sampler_view calls. LTO might help too. */
2827static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx,
2828						 unsigned shader, unsigned count,
2829						 struct pipe_sampler_view **views)
2830{
2831	struct r600_textures_info *samplers = &rctx->samplers[shader];
2832	struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views;
2833	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
2834	int i;
2835
2836	si_pm4_inval_texture_cache(pm4);
2837
2838	for (i = 0; i < count; i++) {
2839		if (views[i]) {
2840			struct r600_texture *rtex =
2841				(struct r600_texture*)views[i]->texture;
2842
2843			if (rtex->is_depth && !rtex->is_flushing_texture) {
2844				samplers->depth_texture_mask |= 1 << i;
2845			} else {
2846				samplers->depth_texture_mask &= ~(1 << i);
2847			}
2848			if (rtex->cmask.size || rtex->fmask.size) {
2849				samplers->compressed_colortex_mask |= 1 << i;
2850			} else {
2851				samplers->compressed_colortex_mask &= ~(1 << i);
2852			}
2853
2854			si_set_sampler_view(rctx, shader, i, views[i], rviews[i]->state);
2855
2856			if (rtex->fmask.size) {
2857				si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2858						    views[i], rviews[i]->fmask_state);
2859			} else {
2860				si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2861						    NULL, NULL);
2862			}
2863		} else {
2864			samplers->depth_texture_mask &= ~(1 << i);
2865			samplers->compressed_colortex_mask &= ~(1 << i);
2866			si_set_sampler_view(rctx, shader, i, NULL, NULL);
2867			si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2868					    NULL, NULL);
2869		}
2870	}
2871	for (; i < samplers->n_views; i++) {
2872		samplers->depth_texture_mask &= ~(1 << i);
2873		samplers->compressed_colortex_mask &= ~(1 << i);
2874		si_set_sampler_view(rctx, shader, i, NULL, NULL);
2875		si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i,
2876				    NULL, NULL);
2877	}
2878
2879	samplers->n_views = count;
2880	return pm4;
2881}
2882
2883static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count,
2884				    struct pipe_sampler_view **views)
2885{
2886	struct r600_context *rctx = (struct r600_context *)ctx;
2887	struct si_pm4_state *pm4;
2888
2889	pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views);
2890	si_pm4_set_state(rctx, vs_sampler_views, pm4);
2891}
2892
2893static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count,
2894				    struct pipe_sampler_view **views)
2895{
2896	struct r600_context *rctx = (struct r600_context *)ctx;
2897	struct si_pm4_state *pm4;
2898
2899	pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views);
2900	si_pm4_set_state(rctx, ps_sampler_views, pm4);
2901}
2902
2903static struct si_pm4_state *si_bind_sampler_states(struct r600_context *rctx, unsigned count,
2904						   void **states,
2905						   struct r600_textures_info *samplers,
2906						   unsigned user_data_reg)
2907{
2908	struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states;
2909	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
2910	uint32_t *border_color_table = NULL;
2911	int i, j;
2912
2913	if (!count)
2914		goto out;
2915
2916	si_pm4_inval_texture_cache(pm4);
2917
2918	si_pm4_sh_data_begin(pm4);
2919	for (i = 0; i < count; i++) {
2920		if (rstates[i] &&
2921		    G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
2922		    V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
2923			if (!rctx->border_color_table ||
2924			    ((rctx->border_color_offset + count - i) &
2925			     C_008F3C_BORDER_COLOR_PTR)) {
2926				si_resource_reference(&rctx->border_color_table, NULL);
2927				rctx->border_color_offset = 0;
2928
2929				rctx->border_color_table =
2930					si_resource_create_custom(&rctx->screen->screen,
2931								  PIPE_USAGE_STAGING,
2932								  4096 * 4 * 4);
2933			}
2934
2935			if (!border_color_table) {
2936			        border_color_table =
2937					rctx->ws->buffer_map(rctx->border_color_table->cs_buf,
2938							     rctx->cs,
2939							     PIPE_TRANSFER_WRITE |
2940							     PIPE_TRANSFER_UNSYNCHRONIZED);
2941			}
2942
2943			for (j = 0; j < 4; j++) {
2944				border_color_table[4 * rctx->border_color_offset + j] =
2945					util_le32_to_cpu(rstates[i]->border_color[j]);
2946			}
2947
2948			rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
2949			rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(rctx->border_color_offset++);
2950		}
2951
2952		for (j = 0; j < Elements(rstates[i]->val); ++j) {
2953			si_pm4_sh_data_add(pm4, rstates[i] ? rstates[i]->val[j] : 0);
2954		}
2955	}
2956	si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_SAMPLER);
2957
2958	if (border_color_table) {
2959		uint64_t va_offset =
2960			r600_resource_va(&rctx->screen->screen,
2961					 (void*)rctx->border_color_table);
2962
2963		si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
2964		if (rctx->chip_class >= CIK)
2965			si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
2966		rctx->ws->buffer_unmap(rctx->border_color_table->cs_buf);
2967		si_pm4_add_bo(pm4, rctx->border_color_table, RADEON_USAGE_READ);
2968	}
2969
2970	memcpy(samplers->samplers, states, sizeof(void*) * count);
2971
2972out:
2973	samplers->n_samplers = count;
2974	return pm4;
2975}
2976
2977static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2978{
2979	struct r600_context *rctx = (struct r600_context *)ctx;
2980	struct si_pm4_state *pm4;
2981
2982	pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_VERTEX],
2983			      R_00B130_SPI_SHADER_USER_DATA_VS_0);
2984	si_pm4_set_state(rctx, vs_sampler, pm4);
2985}
2986
2987static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, void **states)
2988{
2989	struct r600_context *rctx = (struct r600_context *)ctx;
2990	struct si_pm4_state *pm4;
2991
2992	pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_FRAGMENT],
2993			      R_00B030_SPI_SHADER_USER_DATA_PS_0);
2994	si_pm4_set_state(rctx, ps_sampler, pm4);
2995}
2996
2997static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
2998{
2999	struct r600_context *rctx = (struct r600_context *)ctx;
3000	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
3001	uint16_t mask = sample_mask;
3002
3003        if (pm4 == NULL)
3004                return;
3005
3006	si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
3007	si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
3008
3009	si_pm4_set_state(rctx, sample_mask, pm4);
3010}
3011
3012static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3013{
3014	free(state);
3015}
3016
3017/*
3018 * Constants
3019 */
3020static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
3021				   struct pipe_constant_buffer *input)
3022{
3023	struct r600_context *rctx = (struct r600_context *)ctx;
3024	struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
3025	struct pipe_constant_buffer *cb;
3026	const uint8_t *ptr;
3027
3028	/* Note that the state tracker can unbind constant buffers by
3029	 * passing NULL here.
3030	 */
3031	if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
3032		state->enabled_mask &= ~(1 << index);
3033		state->dirty_mask &= ~(1 << index);
3034		pipe_resource_reference(&state->cb[index].buffer, NULL);
3035		return;
3036	}
3037
3038	cb = &state->cb[index];
3039	cb->buffer_size = input->buffer_size;
3040
3041	ptr = input->user_buffer;
3042
3043	if (ptr) {
3044		r600_upload_const_buffer(rctx,
3045				(struct si_resource**)&cb->buffer, ptr,
3046				cb->buffer_size, &cb->buffer_offset);
3047	} else {
3048		/* Setup the hw buffer. */
3049		cb->buffer_offset = input->buffer_offset;
3050		pipe_resource_reference(&cb->buffer, input->buffer);
3051	}
3052
3053	state->enabled_mask |= 1 << index;
3054	state->dirty_mask |= 1 << index;
3055}
3056
3057/*
3058 * Vertex elements & buffers
3059 */
3060
3061static void *si_create_vertex_elements(struct pipe_context *ctx,
3062				       unsigned count,
3063				       const struct pipe_vertex_element *elements)
3064{
3065	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3066	int i;
3067
3068	assert(count < PIPE_MAX_ATTRIBS);
3069	if (!v)
3070		return NULL;
3071
3072	v->count = count;
3073	for (i = 0; i < count; ++i) {
3074		const struct util_format_description *desc;
3075		unsigned data_format, num_format;
3076		int first_non_void;
3077
3078		desc = util_format_description(elements[i].src_format);
3079		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3080		data_format = si_translate_vertexformat(ctx->screen, elements[i].src_format,
3081							desc, first_non_void);
3082
3083		switch (desc->channel[first_non_void].type) {
3084		case UTIL_FORMAT_TYPE_FIXED:
3085			num_format = V_008F0C_BUF_NUM_FORMAT_USCALED; /* XXX */
3086			break;
3087		case UTIL_FORMAT_TYPE_SIGNED:
3088			if (desc->channel[first_non_void].normalized)
3089				num_format = V_008F0C_BUF_NUM_FORMAT_SNORM;
3090			else if (desc->channel[first_non_void].pure_integer)
3091				num_format = V_008F0C_BUF_NUM_FORMAT_SINT;
3092			else
3093				num_format = V_008F0C_BUF_NUM_FORMAT_SSCALED;
3094			break;
3095		case UTIL_FORMAT_TYPE_UNSIGNED:
3096			if (desc->channel[first_non_void].normalized)
3097				num_format = V_008F0C_BUF_NUM_FORMAT_UNORM;
3098			else if (desc->channel[first_non_void].pure_integer)
3099				num_format = V_008F0C_BUF_NUM_FORMAT_UINT;
3100			else
3101				num_format = V_008F0C_BUF_NUM_FORMAT_USCALED;
3102			break;
3103		case UTIL_FORMAT_TYPE_FLOAT:
3104		default:
3105			num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
3106		}
3107
3108		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3109				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3110				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3111				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3112				   S_008F0C_NUM_FORMAT(num_format) |
3113				   S_008F0C_DATA_FORMAT(data_format);
3114	}
3115	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3116
3117	return v;
3118}
3119
3120static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3121{
3122	struct r600_context *rctx = (struct r600_context *)ctx;
3123	struct si_vertex_element *v = (struct si_vertex_element*)state;
3124
3125	rctx->vertex_elements = v;
3126}
3127
3128static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3129{
3130	struct r600_context *rctx = (struct r600_context *)ctx;
3131
3132	if (rctx->vertex_elements == state)
3133		rctx->vertex_elements = NULL;
3134	FREE(state);
3135}
3136
3137static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count,
3138				  const struct pipe_vertex_buffer *buffers)
3139{
3140	struct r600_context *rctx = (struct r600_context *)ctx;
3141
3142	util_set_vertex_buffers_count(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, start_slot, count);
3143}
3144
3145static void si_set_index_buffer(struct pipe_context *ctx,
3146				const struct pipe_index_buffer *ib)
3147{
3148	struct r600_context *rctx = (struct r600_context *)ctx;
3149
3150	if (ib) {
3151		pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
3152	        memcpy(&rctx->index_buffer, ib, sizeof(*ib));
3153	} else {
3154		pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
3155	}
3156}
3157
3158/*
3159 * Misc
3160 */
3161static void si_set_polygon_stipple(struct pipe_context *ctx,
3162				   const struct pipe_poly_stipple *state)
3163{
3164}
3165
3166static void si_texture_barrier(struct pipe_context *ctx)
3167{
3168	struct r600_context *rctx = (struct r600_context *)ctx;
3169	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
3170
3171	if (pm4 == NULL)
3172		return;
3173
3174	si_pm4_inval_texture_cache(pm4);
3175	si_pm4_inval_fb_cache(pm4, rctx->framebuffer.nr_cbufs);
3176	si_pm4_set_state(rctx, texture_barrier, pm4);
3177}
3178
3179static void *si_create_blend_custom(struct r600_context *rctx, unsigned mode)
3180{
3181	struct pipe_blend_state blend;
3182
3183	memset(&blend, 0, sizeof(blend));
3184	blend.independent_blend_enable = true;
3185	blend.rt[0].colormask = 0xf;
3186	return si_create_blend_state_mode(&rctx->context, &blend, mode);
3187}
3188
3189void si_init_state_functions(struct r600_context *rctx)
3190{
3191	int i;
3192
3193	rctx->context.create_blend_state = si_create_blend_state;
3194	rctx->context.bind_blend_state = si_bind_blend_state;
3195	rctx->context.delete_blend_state = si_delete_blend_state;
3196	rctx->context.set_blend_color = si_set_blend_color;
3197
3198	rctx->context.create_rasterizer_state = si_create_rs_state;
3199	rctx->context.bind_rasterizer_state = si_bind_rs_state;
3200	rctx->context.delete_rasterizer_state = si_delete_rs_state;
3201
3202	rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state;
3203	rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3204	rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3205
3206	for (i = 0; i < 8; i++) {
3207		rctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(rctx, true, true, i);
3208		rctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(rctx, true, false, i);
3209		rctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(rctx, false, true, i);
3210	}
3211	rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false, 0);
3212	rctx->custom_blend_resolve = si_create_blend_custom(rctx, V_028808_CB_RESOLVE);
3213	rctx->custom_blend_decompress = si_create_blend_custom(rctx, V_028808_CB_FMASK_DECOMPRESS);
3214
3215	rctx->context.set_clip_state = si_set_clip_state;
3216	rctx->context.set_scissor_states = si_set_scissor_states;
3217	rctx->context.set_viewport_states = si_set_viewport_states;
3218	rctx->context.set_stencil_ref = si_set_pipe_stencil_ref;
3219
3220	rctx->context.set_framebuffer_state = si_set_framebuffer_state;
3221	rctx->context.get_sample_position = si_get_sample_position;
3222
3223	rctx->context.create_vs_state = si_create_vs_state;
3224	rctx->context.create_fs_state = si_create_fs_state;
3225	rctx->context.bind_vs_state = si_bind_vs_shader;
3226	rctx->context.bind_fs_state = si_bind_ps_shader;
3227	rctx->context.delete_vs_state = si_delete_vs_shader;
3228	rctx->context.delete_fs_state = si_delete_ps_shader;
3229
3230	rctx->context.create_sampler_state = si_create_sampler_state;
3231	rctx->context.bind_vertex_sampler_states = si_bind_vs_sampler_states;
3232	rctx->context.bind_fragment_sampler_states = si_bind_ps_sampler_states;
3233	rctx->context.delete_sampler_state = si_delete_sampler_state;
3234
3235	rctx->context.create_sampler_view = si_create_sampler_view;
3236	rctx->context.set_vertex_sampler_views = si_set_vs_sampler_views;
3237	rctx->context.set_fragment_sampler_views = si_set_ps_sampler_views;
3238	rctx->context.sampler_view_destroy = si_sampler_view_destroy;
3239
3240	rctx->context.set_sample_mask = si_set_sample_mask;
3241
3242	rctx->context.set_constant_buffer = si_set_constant_buffer;
3243
3244	rctx->context.create_vertex_elements_state = si_create_vertex_elements;
3245	rctx->context.bind_vertex_elements_state = si_bind_vertex_elements;
3246	rctx->context.delete_vertex_elements_state = si_delete_vertex_element;
3247	rctx->context.set_vertex_buffers = si_set_vertex_buffers;
3248	rctx->context.set_index_buffer = si_set_index_buffer;
3249
3250	rctx->context.create_stream_output_target = si_create_so_target;
3251	rctx->context.stream_output_target_destroy = si_so_target_destroy;
3252	rctx->context.set_stream_output_targets = si_set_so_targets;
3253
3254	rctx->context.texture_barrier = si_texture_barrier;
3255	rctx->context.set_polygon_stipple = si_set_polygon_stipple;
3256
3257	rctx->context.draw_vbo = si_draw_vbo;
3258}
3259
3260void si_init_config(struct r600_context *rctx)
3261{
3262	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
3263
3264	if (pm4 == NULL)
3265		return;
3266
3267	si_cmd_context_control(pm4);
3268
3269	si_pm4_set_reg(pm4, R_028A4C_PA_SC_MODE_CNTL_1, 0x0);
3270
3271	si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0);
3272	si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0);
3273	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
3274	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
3275	si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0);
3276	si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0);
3277	si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0);
3278	si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0);
3279	si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0);
3280	si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0);
3281	si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0);
3282	si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0);
3283	si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, 0x0);
3284	si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
3285	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3286	si_pm4_set_reg(pm4, R_028B94_VGT_STRMOUT_CONFIG, 0x0);
3287	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3288	si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM,
3289		       S_028AA8_SWITCH_ON_EOP(1) |
3290		       S_028AA8_PARTIAL_VS_WAVE_ON(1) |
3291		       S_028AA8_PRIMGROUP_SIZE(63));
3292	si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000);
3293	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3294	if (rctx->chip_class < CIK)
3295		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3296			       S_008A14_CLIP_VTX_REORDER_ENA(1));
3297
3298	si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, 0);
3299	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3300	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3301
3302	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3303
3304	if (rctx->chip_class >= CIK) {
3305		switch (rctx->screen->family) {
3306		case CHIP_BONAIRE:
3307			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
3308			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3309			break;
3310		case CHIP_KAVERI:
3311			/* XXX todo */
3312		case CHIP_KABINI:
3313			/* XXX todo */
3314		default:
3315			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3316			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
3317			break;
3318		}
3319	} else {
3320		switch (rctx->screen->family) {
3321		case CHIP_TAHITI:
3322		case CHIP_PITCAIRN:
3323			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
3324			break;
3325		case CHIP_VERDE:
3326			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
3327			break;
3328		case CHIP_OLAND:
3329			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
3330			break;
3331		case CHIP_HAINAN:
3332			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3333			break;
3334		default:
3335			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
3336			break;
3337		}
3338	}
3339
3340	si_pm4_set_state(rctx, init, pm4);
3341}
3342