1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Christian König <christian.koenig@amd.com>
25 */
26
27#include "radeonsi_pipe.h"
28#include "si_state.h"
29
30/*
31 * Stream out
32 */
33
34#if 0
35void si_context_streamout_begin(struct r600_context *ctx)
36{
37	struct radeon_winsys_cs *cs = ctx->cs;
38	struct si_so_target **t = ctx->so_targets;
39	unsigned *strides = ctx->vs_shader_so_strides;
40	unsigned buffer_en, i;
41
42	buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
43		    (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
44		    (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
45		    (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
46
47	ctx->num_cs_dw_streamout_end =
48		12 + /* flush_vgt_streamout */
49		util_bitcount(buffer_en) * 8 +
50		3;
51
52	si_need_cs_space(ctx,
53			   12 + /* flush_vgt_streamout */
54			   6 + /* enables */
55			   util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 +
56			   util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 +
57			   ctx->num_cs_dw_streamout_end, TRUE);
58
59	if (ctx->chip_class >= CAYMAN) {
60		evergreen_flush_vgt_streamout(ctx);
61		evergreen_set_streamout_enable(ctx, buffer_en);
62	}
63
64	for (i = 0; i < ctx->num_so_targets; i++) {
65#if 0
66		if (t[i]) {
67			t[i]->stride = strides[i];
68			t[i]->so_index = i;
69
70			cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
71			cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
72							16*i - SI_CONTEXT_REG_OFFSET) >> 2;
73			cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
74							t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
75			cs->buf[cs->cdw++] = strides[i] >> 2;		   /* VTX_STRIDE (in DW) */
76			cs->buf[cs->cdw++] = 0;			   /* BUFFER_BASE */
77
78			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
79			cs->buf[cs->cdw++] =
80				si_context_bo_reloc(ctx, si_resource(t[i]->b.buffer),
81						      RADEON_USAGE_WRITE);
82
83			if (ctx->streamout_append_bitmask & (1 << i)) {
84				/* Append. */
85				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
86				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
87							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
88				cs->buf[cs->cdw++] = 0; /* unused */
89				cs->buf[cs->cdw++] = 0; /* unused */
90				cs->buf[cs->cdw++] = 0; /* src address lo */
91				cs->buf[cs->cdw++] = 0; /* src address hi */
92
93				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
94				cs->buf[cs->cdw++] =
95					si_context_bo_reloc(ctx,  t[i]->filled_size,
96							      RADEON_USAGE_READ);
97			} else {
98				/* Start from the beginning. */
99				cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
100				cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
101							       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
102				cs->buf[cs->cdw++] = 0; /* unused */
103				cs->buf[cs->cdw++] = 0; /* unused */
104				cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */
105				cs->buf[cs->cdw++] = 0; /* unused */
106			}
107		}
108#endif
109	}
110}
111
112void si_context_streamout_end(struct r600_context *ctx)
113{
114	struct radeon_winsys_cs *cs = ctx->cs;
115	struct si_so_target **t = ctx->so_targets;
116	unsigned i, flush_flags = 0;
117
118	evergreen_flush_vgt_streamout(ctx);
119
120	for (i = 0; i < ctx->num_so_targets; i++) {
121#if 0
122		if (t[i]) {
123			cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
124			cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
125						       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
126						       STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
127			cs->buf[cs->cdw++] = 0; /* dst address lo */
128			cs->buf[cs->cdw++] = 0; /* dst address hi */
129			cs->buf[cs->cdw++] = 0; /* unused */
130			cs->buf[cs->cdw++] = 0; /* unused */
131
132			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
133			cs->buf[cs->cdw++] =
134				si_context_bo_reloc(ctx,  t[i]->filled_size,
135						      RADEON_USAGE_WRITE);
136
137			flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
138		}
139#endif
140	}
141
142	evergreen_set_streamout_enable(ctx, 0);
143
144	ctx->atom_surface_sync.flush_flags |= flush_flags;
145	si_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
146
147	ctx->num_cs_dw_streamout_end = 0;
148
149	/* XXX print some debug info */
150	for (i = 0; i < ctx->num_so_targets; i++) {
151		if (!t[i])
152			continue;
153
154		uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, ctx->cs, RADEON_USAGE_READ);
155		printf("FILLED_SIZE%i: %u\n", i, *ptr);
156		ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
157	}
158}
159
160void evergreen_flush_vgt_streamout(struct si_context *ctx)
161{
162	struct radeon_winsys_cs *cs = ctx->cs;
163
164	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
165	cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) >> 2;
166	cs->buf[cs->cdw++] = 0;
167
168	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
169	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
170
171	cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
172	cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
173	cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
174	cs->buf[cs->cdw++] = 0;
175	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
176	cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
177	cs->buf[cs->cdw++] = 4; /* poll interval */
178}
179
180void evergreen_set_streamout_enable(struct si_context *ctx, unsigned buffer_enable_bit)
181{
182	struct radeon_winsys_cs *cs = ctx->cs;
183
184	if (buffer_enable_bit) {
185		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
186		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
187		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
188
189		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
190		cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
191		cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
192	} else {
193		cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
194		cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - SI_CONTEXT_REG_OFFSET) >> 2;
195		cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
196	}
197}
198
199#endif
200
201struct pipe_stream_output_target *
202si_create_so_target(struct pipe_context *ctx,
203		    struct pipe_resource *buffer,
204		    unsigned buffer_offset,
205		    unsigned buffer_size)
206{
207#if 0
208	struct si_context *rctx = (struct r600_context *)ctx;
209	struct si_so_target *t;
210	void *ptr;
211
212	t = CALLOC_STRUCT(si_so_target);
213	if (!t) {
214		return NULL;
215	}
216
217	t->b.reference.count = 1;
218	t->b.context = ctx;
219	pipe_resource_reference(&t->b.buffer, buffer);
220	t->b.buffer_offset = buffer_offset;
221	t->b.buffer_size = buffer_size;
222
223	t->filled_size = si_resource_create_custom(ctx->screen, PIPE_USAGE_STATIC, 4);
224	ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
225	memset(ptr, 0, t->filled_size->buf->size);
226	rctx->ws->buffer_unmap(t->filled_size->cs_buf);
227
228	return &t->b;
229#endif
230	return NULL;
231}
232
233void si_so_target_destroy(struct pipe_context *ctx,
234			  struct pipe_stream_output_target *target)
235{
236#if 0
237	struct si_so_target *t = (struct r600_so_target*)target;
238	pipe_resource_reference(&t->b.buffer, NULL);
239	si_resource_reference(&t->filled_size, NULL);
240	FREE(t);
241#endif
242}
243
244void si_set_so_targets(struct pipe_context *ctx,
245		       unsigned num_targets,
246		       struct pipe_stream_output_target **targets,
247		       unsigned append_bitmask)
248{
249	assert(num_targets == 0);
250#if 0
251	struct si_context *rctx = (struct r600_context *)ctx;
252	unsigned i;
253
254	/* Stop streamout. */
255	if (rctx->num_so_targets) {
256		si_context_streamout_end(rctx);
257	}
258
259	/* Set the new targets. */
260	for (i = 0; i < num_targets; i++) {
261		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
262	}
263	for (; i < rctx->num_so_targets; i++) {
264		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
265	}
266
267	rctx->num_so_targets = num_targets;
268	rctx->streamout_start = num_targets != 0;
269	rctx->streamout_append_bitmask = append_bitmask;
270#endif
271}
272