evergreen_compute_internal.c revision 0c4b19ac63efa41242c515824301e6161aceeea5
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 *      Adam Rak <adam.rak@streamnovation.com>
23 */
24
25#include <stdlib.h>
26#include <stdio.h>
27
28#include "pipe/p_defines.h"
29#include "pipe/p_state.h"
30#include "pipe/p_context.h"
31#include "util/u_blitter.h"
32#include "util/u_double_list.h"
33#include "util/u_transfer.h"
34#include "util/u_surface.h"
35#include "util/u_pack_color.h"
36#include "util/u_memory.h"
37#include "util/u_inlines.h"
38#include "util/u_framebuffer.h"
39#include "r600.h"
40#include "r600_resource.h"
41#include "r600_shader.h"
42#include "r600_pipe.h"
43#include "r600_formats.h"
44#include "evergreend.h"
45#include "evergreen_compute_internal.h"
46#include "r600_hw_context_priv.h"
47
48int get_compute_resource_num(void)
49{
50	int num = 0;
51#define DECL_COMPUTE_RESOURCE(name, n) num += n;
52#include "compute_resource.def"
53#undef DECL_COMPUTE_RESOURCE
54	return num;
55}
56
57void evergreen_emit_raw_value(
58	struct evergreen_compute_resource* res,
59	unsigned value)
60{
61	res->cs[res->cs_end++] = value;
62}
63
64void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
65{
66	ctx->cs->buf[ctx->cs->cdw++] = value;
67}
68
69void evergreen_mult_reg_set_(
70	struct evergreen_compute_resource* res,
71	int index,
72	u32* array,
73	int size)
74{
75	int i = 0;
76
77	evergreen_emit_raw_reg_set(res, index, size / 4);
78
79	for (i = 0; i < size; i+=4) {
80		res->cs[res->cs_end++] = array[i / 4];
81	}
82}
83
84void evergreen_reg_set(
85	struct evergreen_compute_resource* res,
86	unsigned index,
87	unsigned value)
88{
89	evergreen_emit_raw_reg_set(res, index, 1);
90	res->cs[res->cs_end++] = value;
91}
92
93struct evergreen_compute_resource* get_empty_res(
94	struct r600_pipe_compute* pipe,
95	enum evergreen_compute_resources res_code,
96	int offset_index)
97{
98	int code_index = -1;
99	int code_size = -1;
100
101	{
102		int i = 0;
103		#define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name	== res_code) {code_index = i; code_size = n;} i += n;
104		#include "compute_resource.def"
105		#undef DECL_COMPUTE_RESOURCE
106	}
107
108	assert(code_index != -1 && "internal error: resouce index not found");
109	assert(offset_index < code_size && "internal error: overindexing resource");
110
111	int index = code_index + offset_index;
112
113	struct evergreen_compute_resource* res = &pipe->resources[index];
114
115	res->enabled = true;
116	res->bo = NULL;
117	res->cs_end = 0;
118	bzero(&res->do_reloc, sizeof(res->do_reloc));
119
120	return res;
121}
122
123void evergreen_emit_raw_reg_set(
124	struct evergreen_compute_resource* res,
125	unsigned index,
126	int num)
127{
128	res->enabled = 1;
129	int cs_end = res->cs_end;
130
131	if (index >= EVERGREEN_CONFIG_REG_OFFSET
132			&& index < EVERGREEN_CONFIG_REG_END) {
133		res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
134		res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
135	} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
136			&& index < EVERGREEN_CONTEXT_REG_END) {
137		res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
138		res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
139	} else if (index >= EVERGREEN_RESOURCE_OFFSET
140			&& index < EVERGREEN_RESOURCE_END) {
141		res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0);
142		res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
143	} else if (index >= EVERGREEN_SAMPLER_OFFSET
144			&& index < EVERGREEN_SAMPLER_END) {
145		res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0);
146		res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
147	} else if (index >= EVERGREEN_CTL_CONST_OFFSET
148			&& index < EVERGREEN_CTL_CONST_END) {
149		res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
150		res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
151	} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
152			&& index < EVERGREEN_LOOP_CONST_END) {
153		res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
154		res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
155	} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
156			&& index < EVERGREEN_BOOL_CONST_END) {
157		res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
158		res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
159	} else {
160		res->cs[cs_end] = PKT0(index, num-1);
161		res->cs_end--;
162	}
163
164	res->cs_end += 2;
165}
166
167void evergreen_emit_force_reloc(struct evergreen_compute_resource* res)
168{
169	res->do_reloc[res->cs_end] += 1;
170}
171
172void evergreen_emit_ctx_reg_set(
173	struct r600_context *ctx,
174	unsigned index,
175	int num)
176{
177
178	if (index >= EVERGREEN_CONFIG_REG_OFFSET
179			&& index < EVERGREEN_CONFIG_REG_END) {
180		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
181		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
182	} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
183			&& index < EVERGREEN_CONTEXT_REG_END) {
184		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
185		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
186	} else if (index >= EVERGREEN_RESOURCE_OFFSET
187			&& index < EVERGREEN_RESOURCE_END) {
188		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
189		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
190	} else if (index >= EVERGREEN_SAMPLER_OFFSET
191			&& index < EVERGREEN_SAMPLER_END) {
192		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
193		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
194	} else if (index >= EVERGREEN_CTL_CONST_OFFSET
195			&& index < EVERGREEN_CTL_CONST_END) {
196		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
197		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
198	} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
199			&& index < EVERGREEN_LOOP_CONST_END) {
200		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
201		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
202	} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
203			&& index < EVERGREEN_BOOL_CONST_END) {
204		ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
205		ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
206	} else {
207		ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
208	}
209}
210
211void evergreen_emit_ctx_reloc(
212	struct r600_context *ctx,
213	struct r600_resource *bo,
214	enum radeon_bo_usage usage)
215{
216	assert(bo);
217
218	ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
219	u32 rr = r600_context_bo_reloc(ctx, bo, usage);
220	ctx->cs->buf[ctx->cs->cdw++] = rr;
221}
222
223void evergreen_set_buffer_sync(
224	struct r600_context *ctx,
225	struct r600_resource* bo,
226	int size,
227	int flags,
228	enum radeon_bo_usage usage)
229{
230	assert(bo);
231	int32_t cp_coher_size = 0;
232
233	if (size == 0xffffffff || size == 0) {
234		cp_coher_size = 0xffffffff;
235	}
236	else {
237		cp_coher_size = ((size + 255) >> 8);
238	}
239
240	uint32_t sync_flags = 0;
241
242	if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) {
243		sync_flags |= S_0085F0_TC_ACTION_ENA(1);
244	}
245
246	if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) {
247		sync_flags |= S_0085F0_VC_ACTION_ENA(1);
248	}
249
250	if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) {
251		sync_flags |= S_0085F0_SH_ACTION_ENA(1);
252	}
253
254	if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) {
255		sync_flags |= S_0085F0_CB_ACTION_ENA(1);
256
257		switch((flags >> 8) & 0xF) {
258		case 0:
259			sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1);
260			break;
261		case 1:
262			sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1);
263			break;
264		case 2:
265			sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1);
266			break;
267		case 3:
268			sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1);
269			break;
270		case 4:
271			sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1);
272			break;
273		case 5:
274			sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1);
275			break;
276		case 6:
277			sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1);
278			break;
279		case 7:
280			sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1);
281			break;
282		case 8:
283			sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1);
284			break;
285		case 9:
286			sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1);
287			break;
288		case 10:
289			sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1);
290			break;
291		case 11:
292			sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1);
293			break;
294		default:
295			assert(0);
296		}
297	}
298
299	int32_t poll_interval = 10;
300
301	ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
302	ctx->cs->buf[ctx->cs->cdw++] = sync_flags;
303	ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size;
304	ctx->cs->buf[ctx->cs->cdw++] = 0;
305	ctx->cs->buf[ctx->cs->cdw++] = poll_interval;
306
307	if (cp_coher_size != 0xffffffff) {
308		evergreen_emit_ctx_reloc(ctx, bo, usage);
309	}
310}
311
312int evergreen_compute_get_gpu_format(
313	struct number_type_and_format* fmt,
314	struct r600_resource *bo)
315{
316	switch (bo->b.b.format)
317	{
318		case PIPE_FORMAT_R8_UNORM:
319		case PIPE_FORMAT_R32_UNORM:
320			fmt->format = V_028C70_COLOR_32;
321			fmt->number_type = V_028C70_NUMBER_UNORM;
322			fmt->num_format_all = 0;
323		break;
324		case PIPE_FORMAT_R32_FLOAT:
325			fmt->format = V_028C70_COLOR_32_FLOAT;
326			fmt->number_type = V_028C70_NUMBER_FLOAT;
327			fmt->num_format_all = 0;
328		break;
329		case PIPE_FORMAT_R32G32B32A32_FLOAT:
330			fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT;
331			fmt->number_type = V_028C70_NUMBER_FLOAT;
332			fmt->num_format_all = 0;
333		break;
334
335		///TODO: other formats...
336
337		default:
338			return 0;
339	}
340
341	return 1;
342}
343
344void evergreen_set_rat(
345	struct r600_pipe_compute *pipe,
346	int id,
347	struct r600_resource* bo,
348	int start,
349	int size)
350{
351	assert(id < 12);
352	assert((size & 3) == 0);
353	assert((start & 0xFF) == 0);
354
355	int offset;
356	COMPUTE_DBG("bind rat: %i \n", id);
357
358	if (id < 8) {
359		offset = id*0x3c;
360	}
361	else {
362		offset = 8*0x3c + (id-8)*0x1c;
363	}
364
365	int linear = 0;
366
367	if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1
368			&& bo->b.b.target == PIPE_BUFFER) {
369		linear = 1;
370	}
371
372	struct evergreen_compute_resource* res =
373		get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id);
374
375	evergreen_emit_force_reloc(res);
376
377	evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D?
378	evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0);
379
380	struct number_type_and_format fmt;
381
382	///default config
383	if (bo->b.b.format == PIPE_FORMAT_NONE) {
384		 fmt.format = V_028C70_COLOR_32;
385		 fmt.number_type = V_028C70_NUMBER_FLOAT;
386	} else {
387		evergreen_compute_get_gpu_format(&fmt, bo);
388	}
389
390	if (linear) {
391		evergreen_reg_set(res,
392			R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1)
393			| S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED)
394			| S_028C70_FORMAT(fmt.format)
395			| S_028C70_NUMBER_TYPE(fmt.number_type)
396		);
397		evergreen_emit_force_reloc(res);
398	} else {
399		assert(0 && "TODO");
400		///TODO
401//	 evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????));
402//	 evergreen_emit_force_reloc(res);
403	}
404
405	evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1));
406	evergreen_emit_force_reloc(res);
407
408	if (linear) {
409		/* XXX: Why are we using size instead of bo->b.b.b.width0 ? */
410		evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size);
411	} else {
412		evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM,
413			S_028C78_WIDTH_MAX(bo->b.b.width0)
414			| S_028C78_HEIGHT_MAX(bo->b.b.height0));
415	}
416
417	if (id < 8) {
418		evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0);
419		evergreen_emit_force_reloc(res);
420		evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0);
421		evergreen_emit_force_reloc(res);
422	}
423
424	evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8);
425
426	res->bo = bo;
427	res->usage = RADEON_USAGE_READWRITE;
428	res->coher_bo_size = size;
429	res->flags = COMPUTE_RES_CB_FLUSH(id);
430}
431
432void evergreen_set_lds(
433	struct r600_pipe_compute *pipe,
434	int num_lds,
435	int size,
436	int num_waves)
437{
438	struct evergreen_compute_resource* res =
439		get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
440
441	if (pipe->ctx->chip_class < CAYMAN) {
442		evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
443			S_008E2C_NUM_LS_LDS(num_lds));
444	} else {
445		evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT,
446					S_0286FC_NUM_LS_LDS(num_lds));
447	}
448	evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
449}
450
451void evergreen_set_gds(
452	struct r600_pipe_compute *pipe,
453	uint32_t addr,
454	uint32_t size)
455{
456	struct evergreen_compute_resource* res =
457		get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0);
458
459	evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1);
460	evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr);
461	evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size);
462}
463
464void evergreen_set_export(
465	struct r600_pipe_compute *pipe,
466	struct r600_resource* bo,
467	int offset, int size)
468{
469	#define SX_MEMORY_EXPORT_BASE 0x9010
470	#define SX_MEMORY_EXPORT_SIZE 0x9014
471
472	struct evergreen_compute_resource* res =
473		get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0);
474
475	evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size);
476
477	if (size) {
478		evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset);
479		res->bo = bo;
480		res->usage = RADEON_USAGE_WRITE;
481		res->coher_bo_size = size;
482		res->flags = 0;
483	}
484}
485
486void evergreen_set_loop_const(
487	struct r600_pipe_compute *pipe,
488	int id, int count, int init, int inc) {
489
490	struct evergreen_compute_resource* res =
491		get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id);
492
493	assert(id < 32);
494	assert(count <= 0xFFF);
495	assert(init <= 0xFF);
496	assert(inc <= 0xFF);
497
498	/* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to
499         * SQ_LOOP_CONST_191 */
500	evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4),
501		count | init << 12 | inc << 24);
502}
503
504void evergreen_set_tmp_ring(
505	struct r600_pipe_compute *pipe,
506	struct r600_resource* bo,
507	int offset, int size, int se)
508{
509	#define SQ_LSTMP_RING_BASE 0x00008e10
510	#define SQ_LSTMP_RING_SIZE 0x00008e14
511	#define GRBM_GFX_INDEX                                  0x802C
512	#define         INSTANCE_INDEX(x)                       ((x) << 0)
513	#define         SE_INDEX(x)                             ((x) << 16)
514	#define         INSTANCE_BROADCAST_WRITES               (1 << 30)
515	#define         SE_BROADCAST_WRITES                     (1 << 31)
516
517	struct evergreen_compute_resource* res =
518		get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se);
519
520	evergreen_reg_set(res,
521		GRBM_GFX_INDEX,INSTANCE_INDEX(0)
522		| SE_INDEX(se)
523		| INSTANCE_BROADCAST_WRITES);
524	evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size);
525
526	if (size) {
527		assert(bo);
528
529		evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset);
530		res->bo = bo;
531		res->usage = RADEON_USAGE_WRITE;
532		res->coher_bo_size = 0;
533		res->flags = 0;
534	}
535
536	if (size) {
537		evergreen_emit_force_reloc(res);
538	}
539
540	evergreen_reg_set(res,
541		GRBM_GFX_INDEX,INSTANCE_INDEX(0)
542		| SE_INDEX(0)
543		| INSTANCE_BROADCAST_WRITES
544		| SE_BROADCAST_WRITES);
545}
546
547static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
548{
549	if (R600_BIG_ENDIAN) {
550		switch(colorformat) {
551		case V_028C70_COLOR_4_4:
552			return ENDIAN_NONE;
553
554		/* 8-bit buffers. */
555		case V_028C70_COLOR_8:
556			return ENDIAN_NONE;
557
558		/* 16-bit buffers. */
559		case V_028C70_COLOR_5_6_5:
560		case V_028C70_COLOR_1_5_5_5:
561		case V_028C70_COLOR_4_4_4_4:
562		case V_028C70_COLOR_16:
563		case V_028C70_COLOR_8_8:
564			return ENDIAN_8IN16;
565
566		/* 32-bit buffers. */
567		case V_028C70_COLOR_8_8_8_8:
568		case V_028C70_COLOR_2_10_10_10:
569		case V_028C70_COLOR_8_24:
570		case V_028C70_COLOR_24_8:
571		case V_028C70_COLOR_32_FLOAT:
572		case V_028C70_COLOR_16_16_FLOAT:
573		case V_028C70_COLOR_16_16:
574			return ENDIAN_8IN32;
575
576		/* 64-bit buffers. */
577		case V_028C70_COLOR_16_16_16_16:
578		case V_028C70_COLOR_16_16_16_16_FLOAT:
579			return ENDIAN_8IN16;
580
581		case V_028C70_COLOR_32_32_FLOAT:
582		case V_028C70_COLOR_32_32:
583		case V_028C70_COLOR_X24_8_32_FLOAT:
584			return ENDIAN_8IN32;
585
586		/* 96-bit buffers. */
587		case V_028C70_COLOR_32_32_32_FLOAT:
588		/* 128-bit buffers. */
589		case V_028C70_COLOR_32_32_32_32_FLOAT:
590		case V_028C70_COLOR_32_32_32_32:
591			return ENDIAN_8IN32;
592		default:
593			return ENDIAN_NONE; /* Unsupported. */
594		}
595	} else {
596		return ENDIAN_NONE;
597	}
598}
599
600static unsigned r600_tex_dim(unsigned dim)
601{
602	switch (dim) {
603	default:
604	case PIPE_TEXTURE_1D:
605		return V_030000_SQ_TEX_DIM_1D;
606	case PIPE_TEXTURE_1D_ARRAY:
607		return V_030000_SQ_TEX_DIM_1D_ARRAY;
608	case PIPE_TEXTURE_2D:
609	case PIPE_TEXTURE_RECT:
610		return V_030000_SQ_TEX_DIM_2D;
611	case PIPE_TEXTURE_2D_ARRAY:
612		return V_030000_SQ_TEX_DIM_2D_ARRAY;
613	case PIPE_TEXTURE_3D:
614		return V_030000_SQ_TEX_DIM_3D;
615	case PIPE_TEXTURE_CUBE:
616		return V_030000_SQ_TEX_DIM_CUBEMAP;
617	}
618}
619
620void evergreen_set_vtx_resource(
621	struct r600_pipe_compute *pipe,
622	struct r600_resource* bo,
623	int id, uint64_t offset, int writable)
624{
625	assert(id < 16);
626	uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
627	struct number_type_and_format fmt;
628	uint64_t va;
629
630	fmt.format = 0;
631
632	assert(bo->b.b.height0 <= 1);
633	assert(bo->b.b.depth0 <= 1);
634
635	int e = evergreen_compute_get_gpu_format(&fmt, bo);
636
637	assert(e && "unknown format");
638
639	struct evergreen_compute_resource* res =
640		get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
641
642	unsigned size = bo->b.b.width0;
643	unsigned stride = 1;
644
645//	size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
646//		util_format_get_blocksize(bo->b.b.b.format));
647
648	va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset;
649
650	COMPUTE_DBG("id: %i vtx size: %i byte,	width0: %i elem\n",
651		id, size, bo->b.b.width0);
652
653	sq_vtx_constant_word2 =
654		S_030008_BASE_ADDRESS_HI(va >> 32) |
655		S_030008_STRIDE(stride) |
656		S_030008_DATA_FORMAT(fmt.format) |
657		S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
658		S_030008_ENDIAN_SWAP(0);
659
660	COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
661			stride, fmt.format, fmt.num_format_all);
662
663	sq_vtx_constant_word3 =
664		S_03000C_DST_SEL_X(0) |
665		S_03000C_DST_SEL_Y(1) |
666		S_03000C_DST_SEL_Z(2) |
667		S_03000C_DST_SEL_W(3);
668
669	sq_vtx_constant_word4 = 0;
670
671	evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
672	evergreen_emit_raw_value(res, (id+816)*32 >> 2);
673	evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff));
674	evergreen_emit_raw_value(res, size - 1);
675	evergreen_emit_raw_value(res, sq_vtx_constant_word2);
676	evergreen_emit_raw_value(res, sq_vtx_constant_word3);
677	evergreen_emit_raw_value(res, sq_vtx_constant_word4);
678	evergreen_emit_raw_value(res, 0);
679	evergreen_emit_raw_value(res, 0);
680	evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
681
682	res->bo = bo;
683
684	if (writable) {
685		res->usage = RADEON_USAGE_READWRITE;
686	}
687	else {
688		res->usage = RADEON_USAGE_READ;
689	}
690
691	res->coher_bo_size = size;
692	res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH;
693}
694
695void evergreen_set_tex_resource(
696	struct r600_pipe_compute *pipe,
697	struct r600_pipe_sampler_view* view,
698	int id)
699{
700	struct evergreen_compute_resource* res =
701		get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id);
702	struct r600_resource_texture *tmp =
703		(struct r600_resource_texture*)view->base.texture;
704
705	unsigned format, endian;
706	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
707	unsigned char swizzle[4], array_mode = 0, tile_type = 0;
708	unsigned height, depth;
709
710	swizzle[0] = 0;
711	swizzle[1] = 1;
712	swizzle[2] = 2;
713	swizzle[3] = 3;
714
715	format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen,
716		view->base.format, swizzle, &word4, &yuv_format);
717
718	if (format == ~0) {
719		format = 0;
720	}
721
722	endian = r600_colorformat_endian_swap(format);
723
724	height = view->base.texture->height0;
725	depth = view->base.texture->depth0;
726
727	pitch = align(tmp->pitch_in_blocks[0] *
728		util_format_get_blockwidth(tmp->real_format), 8);
729	array_mode = tmp->array_mode[0];
730	tile_type = tmp->tile_type;
731
732	assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
733	assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
734
735	evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
736	evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line
737	evergreen_emit_raw_value(res,
738				(S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
739				S_030000_PITCH((pitch / 8) - 1) |
740				S_030000_NON_DISP_TILING_ORDER(tile_type) |
741				S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
742	evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
743				S_030004_TEX_DEPTH(depth - 1) |
744				S_030004_ARRAY_MODE(array_mode)));
745	evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
746	evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
747	evergreen_emit_raw_value(res, (word4 |
748				S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
749				S_030010_ENDIAN_SWAP(endian) |
750				S_030010_BASE_LEVEL(0)));
751	evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) |
752				S_030014_BASE_ARRAY(0) |
753				S_030014_LAST_ARRAY(0)));
754	evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */)));
755	evergreen_emit_raw_value(res,
756		S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)
757		| S_03001C_DATA_FORMAT(format));
758
759	res->bo = (struct r600_resource*)view->base.texture;
760
761	res->usage = RADEON_USAGE_READ;
762
763	res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
764	res->flags = COMPUTE_RES_TC_FLUSH;
765
766	evergreen_emit_force_reloc(res);
767	evergreen_emit_force_reloc(res);
768}
769
770void evergreen_set_sampler_resource(
771	struct r600_pipe_compute *pipe,
772	struct compute_sampler_state *sampler,
773	int id)
774{
775	struct evergreen_compute_resource* res =
776		get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id);
777
778	unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0;
779
780	evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0));
781	evergreen_emit_raw_value(res, (id + 90)*3);
782	evergreen_emit_raw_value(res,
783		S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) |
784		S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) |
785		S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) |
786		S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) |
787		S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) |
788		S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK)
789	);
790	evergreen_emit_raw_value(res,
791		S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) |
792		S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8))
793	);
794	evergreen_emit_raw_value(res,
795		S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) |
796		(sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
797		S_03C008_TYPE(1)
798	);
799}
800
801void evergreen_set_const_cache(
802	struct r600_pipe_compute *pipe,
803	int cache_id,
804	struct r600_resource* cbo,
805	int size, int offset)
806{
807	#define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0
808	#define SQ_ALU_CONST_CACHE_LS_0 0x00028f40
809
810	struct evergreen_compute_resource* res =
811		get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id);
812
813	assert(size < 0x200);
814	assert((offset & 0xFF) == 0);
815	assert(cache_id < 16);
816
817	evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size);
818	evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8);
819	res->bo = cbo;
820	res->usage = RADEON_USAGE_READ;
821	res->coher_bo_size = size;
822	res->flags = COMPUTE_RES_SH_FLUSH;
823}
824
825struct r600_resource* r600_compute_buffer_alloc_vram(
826	struct r600_screen *screen,
827	unsigned size)
828{
829	assert(size);
830
831	struct pipe_resource * buffer = pipe_buffer_create(
832			(struct pipe_screen*) screen,
833			PIPE_BIND_CUSTOM,
834			PIPE_USAGE_IMMUTABLE,
835			size);
836
837	return (struct r600_resource *)buffer;
838}
839