14b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
24b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon
34b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon/*
44b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
54b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon *
64b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * Permission is hereby granted, free of charge, to any person obtaining a
74b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * copy of this software and associated documentation files (the "Software"),
84b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * to deal in the Software without restriction, including without limitation
94b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * the rights to use, copy, modify, merge, publish, distribute, sublicense,
104b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * and/or sell copies of the Software, and to permit persons to whom the
114b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * Software is furnished to do so, subject to the following conditions:
124b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon *
13ce8e33a095030e7af94f58f9da196b240bdf0476Senthil Kumaran * The above copyright notice and this permission notice (including the next
144b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * paragraph) shall be included in all copies or substantial portions of the
154b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * Software.
164b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon *
174b964f9c904744b7d7d88054e54a2e4ca8aeb395Brett Cannon * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#ifndef FREEDRENO_UTIL_H_
30#define FREEDRENO_UTIL_H_
31
32#include <freedreno_drmif.h>
33#include <freedreno_ringbuffer.h>
34
35#include "pipe/p_format.h"
36#include "pipe/p_state.h"
37#include "util/u_debug.h"
38#include "util/u_math.h"
39#include "util/u_half.h"
40#include "util/u_dynarray.h"
41#include "util/u_pack_color.h"
42
43#include "disasm.h"
44#include "adreno_common.xml.h"
45#include "adreno_pm4.xml.h"
46
47enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format);
48enum pc_di_index_size fd_pipe2index(enum pipe_format format);
49enum pipe_format fd_gmem_restore_format(enum pipe_format format);
50enum adreno_rb_blend_factor fd_blend_factor(unsigned factor);
51enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
52enum adreno_stencil_op fd_stencil_op(unsigned op);
53
54#define A3XX_MAX_MIP_LEVELS 14
55/* TBD if it is same on a2xx, but for now: */
56#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
57
58#define A2XX_MAX_RENDER_TARGETS 1
59#define A3XX_MAX_RENDER_TARGETS 4
60#define A4XX_MAX_RENDER_TARGETS 8
61#define A5XX_MAX_RENDER_TARGETS 8
62
63#define MAX_RENDER_TARGETS A5XX_MAX_RENDER_TARGETS
64
65#define FD_DBG_MSGS     0x0001
66#define FD_DBG_DISASM   0x0002
67#define FD_DBG_DCLEAR   0x0004
68#define FD_DBG_DDRAW    0x0008
69#define FD_DBG_NOSCIS   0x0010
70#define FD_DBG_DIRECT   0x0020
71#define FD_DBG_NOBYPASS 0x0040
72#define FD_DBG_FRAGHALF 0x0080
73#define FD_DBG_NOBIN    0x0100
74#define FD_DBG_OPTMSGS  0x0200
75#define FD_DBG_GLSL120  0x0400
76#define FD_DBG_SHADERDB 0x0800
77#define FD_DBG_FLUSH    0x1000
78#define FD_DBG_DEQP     0x2000
79#define FD_DBG_NIR      0x4000
80#define FD_DBG_REORDER  0x8000
81#define FD_DBG_BSTAT   0x10000
82#define FD_DBG_NOGROW  0x20000
83
84extern int fd_mesa_debug;
85extern bool fd_binning_enabled;
86
87#define DBG(fmt, ...) \
88		do { if (fd_mesa_debug & FD_DBG_MSGS) \
89			debug_printf("%s:%d: "fmt "\n", \
90				__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
91
92/* for conditionally setting boolean flag(s): */
93#define COND(bool, val) ((bool) ? (val) : 0)
94
95#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000))))
96
97static inline uint32_t DRAW(enum pc_di_primtype prim_type,
98		enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
99		enum pc_di_vis_cull_mode vis_cull_mode,
100		uint8_t instances)
101{
102	return (prim_type         << 0) |
103			(source_select     << 6) |
104			((index_size & 1)  << 11) |
105			((index_size >> 1) << 13) |
106			(vis_cull_mode     << 9) |
107			(1                 << 14) |
108			(instances         << 24);
109}
110
111/* for tracking cmdstream positions that need to be patched: */
112struct fd_cs_patch {
113	uint32_t *cs;
114	uint32_t val;
115};
116#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
117#define fd_patch_element(buf, i)   util_dynarray_element(buf, struct fd_cs_patch, i)
118
119static inline enum pipe_format
120pipe_surface_format(struct pipe_surface *psurf)
121{
122	if (!psurf)
123		return PIPE_FORMAT_NONE;
124	return psurf->format;
125}
126
127static inline bool
128fd_surface_half_precision(const struct pipe_surface *psurf)
129{
130	enum pipe_format format;
131
132	if (!psurf)
133		return true;
134
135	format = psurf->format;
136
137	/* colors are provided in consts, which go through cov.f32f16, which will
138	 * break these values
139	 */
140	if (util_format_is_pure_integer(format))
141		return false;
142
143	/* avoid losing precision on 32-bit float formats */
144	if (util_format_is_float(format) &&
145		util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
146		return false;
147
148	return true;
149}
150
151static inline unsigned
152fd_sampler_first_level(const struct pipe_sampler_view *view)
153{
154	if (view->target == PIPE_BUFFER)
155		return 0;
156	return view->u.tex.first_level;
157}
158
159static inline unsigned
160fd_sampler_last_level(const struct pipe_sampler_view *view)
161{
162	if (view->target == PIPE_BUFFER)
163		return 0;
164	return view->u.tex.last_level;
165}
166
167static inline bool
168fd_half_precision(struct pipe_framebuffer_state *pfb)
169{
170	unsigned i;
171
172	for (i = 0; i < pfb->nr_cbufs; i++)
173		if (!fd_surface_half_precision(pfb->cbufs[i]))
174			return false;
175
176	return true;
177}
178
179#define LOG_DWORDS 0
180
181static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
182static inline void emit_marker5(struct fd_ringbuffer *ring, int scratch_idx);
183
184static inline void
185OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
186{
187	if (LOG_DWORDS) {
188		DBG("ring[%p]: OUT_RING   %04x:  %08x", ring,
189				(uint32_t)(ring->cur - ring->last_start), data);
190	}
191	fd_ringbuffer_emit(ring, data);
192}
193
194/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
195static inline void
196OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
197		struct util_dynarray *buf)
198{
199	if (LOG_DWORDS) {
200		DBG("ring[%p]: OUT_RINGP  %04x:  %08x", ring,
201				(uint32_t)(ring->cur - ring->last_start), data);
202	}
203	util_dynarray_append(buf, struct fd_cs_patch, ((struct fd_cs_patch){
204		.cs  = ring->cur++,
205		.val = data,
206	}));
207}
208
209/*
210 * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
211 */
212
213static inline void
214OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
215		uint32_t offset, uint64_t or, int32_t shift)
216{
217	if (LOG_DWORDS) {
218		DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
219				(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
220	}
221	debug_assert(offset < fd_bo_size(bo));
222	fd_ringbuffer_reloc2(ring, &(struct fd_reloc){
223		.bo = bo,
224		.flags = FD_RELOC_READ,
225		.offset = offset,
226		.or = or,
227		.shift = shift,
228		.orhi = or >> 32,
229	});
230}
231
232static inline void
233OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
234		uint32_t offset, uint64_t or, int32_t shift)
235{
236	if (LOG_DWORDS) {
237		DBG("ring[%p]: OUT_RELOCW  %04x:  %p+%u << %d", ring,
238				(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
239	}
240	debug_assert(offset < fd_bo_size(bo));
241	fd_ringbuffer_reloc2(ring, &(struct fd_reloc){
242		.bo = bo,
243		.flags = FD_RELOC_READ | FD_RELOC_WRITE,
244		.offset = offset,
245		.or = or,
246		.shift = shift,
247		.orhi = or >> 32,
248	});
249}
250
251static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
252{
253	if (ring->cur + ndwords >= ring->end)
254		fd_ringbuffer_grow(ring, ndwords);
255}
256
257static inline uint32_t
258__gpu_id(struct fd_ringbuffer *ring)
259{
260	uint64_t val;
261	fd_pipe_get_param(ring->pipe, FD_GPU_ID, &val);
262	return val;
263}
264
265static inline void
266OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
267{
268	debug_assert(__gpu_id(ring) < 500);
269	BEGIN_RING(ring, cnt+1);
270	OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
271}
272
273static inline void
274OUT_PKT2(struct fd_ringbuffer *ring)
275{
276	debug_assert(__gpu_id(ring) < 500);
277	BEGIN_RING(ring, 1);
278	OUT_RING(ring, CP_TYPE2_PKT);
279}
280
281static inline void
282OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
283{
284	debug_assert(__gpu_id(ring) < 500);
285	BEGIN_RING(ring, cnt+1);
286	OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
287}
288
289/*
290 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
291 */
292
293static inline unsigned
294_odd_parity_bit(unsigned val)
295{
296	/* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
297	 * note that we want odd parity so 0x6996 is inverted.
298	 */
299	val ^= val >> 16;
300	val ^= val >> 8;
301	val ^= val >> 4;
302	val &= 0xf;
303	return (~0x6996 >> val) & 1;
304}
305
306static inline void
307OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
308{
309	BEGIN_RING(ring, cnt+1);
310	OUT_RING(ring, CP_TYPE4_PKT | cnt |
311			(_odd_parity_bit(cnt) << 7) |
312			((regindx & 0x3ffff) << 8) |
313			((_odd_parity_bit(regindx) << 27)));
314}
315
316static inline void
317OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
318{
319	BEGIN_RING(ring, cnt+1);
320	OUT_RING(ring, CP_TYPE7_PKT | cnt |
321			(_odd_parity_bit(cnt) << 15) |
322			((opcode & 0x7f) << 16) |
323			((_odd_parity_bit(opcode) << 23)));
324}
325
326static inline void
327OUT_WFI(struct fd_ringbuffer *ring)
328{
329	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
330	OUT_RING(ring, 0x00000000);
331}
332
333static inline void
334OUT_WFI5(struct fd_ringbuffer *ring)
335{
336	OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
337}
338
339static inline void
340__OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target)
341{
342	unsigned count = fd_ringbuffer_cmd_count(target);
343
344	debug_assert(__gpu_id(ring) < 500);
345
346	/* for debug after a lock up, write a unique counter value
347	 * to scratch6 for each IB, to make it easier to match up
348	 * register dumps to cmdstream.  The combination of IB and
349	 * DRAW (scratch7) is enough to "triangulate" the particular
350	 * draw that caused lockup.
351	 */
352	emit_marker(ring, 6);
353
354	for (unsigned i = 0; i < count; i++) {
355		uint32_t dwords;
356		OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
357		dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
358		assert(dwords > 0);
359		OUT_RING(ring, dwords);
360		OUT_PKT2(ring);
361	}
362
363	emit_marker(ring, 6);
364}
365
366static inline void
367__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
368{
369	unsigned count = fd_ringbuffer_cmd_count(target);
370
371	/* for debug after a lock up, write a unique counter value
372	 * to scratch6 for each IB, to make it easier to match up
373	 * register dumps to cmdstream.  The combination of IB and
374	 * DRAW (scratch7) is enough to "triangulate" the particular
375	 * draw that caused lockup.
376	 */
377	emit_marker5(ring, 6);
378
379	for (unsigned i = 0; i < count; i++) {
380		uint32_t dwords;
381		OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
382		dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
383		assert(dwords > 0);
384		OUT_RING(ring, dwords);
385	}
386
387	emit_marker5(ring, 6);
388}
389
390/* CP_SCRATCH_REG4 is used to hold base address for query results: */
391// XXX annoyingly scratch regs move on a5xx.. and additionally different
392// packet types.. so freedreno_query_hw is going to need a bit of
393// rework..
394#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
395
396static inline void
397emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
398{
399	extern unsigned marker_cnt;
400	unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
401	assert(reg != HW_QUERY_BASE_REG);
402	if (reg == HW_QUERY_BASE_REG)
403		return;
404	OUT_PKT0(ring, reg, 1);
405	OUT_RING(ring, ++marker_cnt);
406}
407
408static inline void
409emit_marker5(struct fd_ringbuffer *ring, int scratch_idx)
410{
411	extern unsigned marker_cnt;
412//XXX	unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx);
413	unsigned reg = 0x00000b78 + scratch_idx;
414	assert(reg != HW_QUERY_BASE_REG);
415	if (reg == HW_QUERY_BASE_REG)
416		return;
417	OUT_WFI5(ring);
418	OUT_PKT4(ring, reg, 1);
419	OUT_RING(ring, ++marker_cnt);
420	OUT_WFI5(ring);
421}
422
423/* helper to get numeric value from environment variable..  mostly
424 * just leaving this here because it is helpful to brute-force figure
425 * out unknown formats, etc, which blob driver does not support:
426 */
427static inline uint32_t env2u(const char *envvar)
428{
429	char *str = getenv(envvar);
430	if (str)
431		return strtoul(str, NULL, 0);
432	return 0;
433}
434
435static inline uint32_t
436pack_rgba(enum pipe_format format, const float *rgba)
437{
438	union util_color uc;
439	util_pack_color(rgba, format, &uc);
440	return uc.ui[0];
441}
442
443/*
444 * swap - swap value of @a and @b
445 */
446#define swap(a, b) \
447	do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
448
449#define foreach_bit(b, mask) \
450	for (uint32_t _m = (mask); _m && ({(b) = u_bit_scan(&_m); 1;});)
451
452#endif /* FREEDRENO_UTIL_H_ */
453