1/* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2/*
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 *    Gareth Hughes <gareth@valinux.com>
27 *    Kevin E. Martin <martin@valinux.com>
28 */
29
30#include "drmP.h"
31#include "drm.h"
32#include "drm_buffer.h"
33#include "drm_sarea.h"
34#include "radeon_drm.h"
35#include "radeon_drv.h"
36
37/* ================================================================
38 * Helper functions for client state checking and fixup
39 */
40
41static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42						    dev_priv,
43						    struct drm_file * file_priv,
44						    u32 *offset)
45{
46	u64 off = *offset;
47	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
48	struct drm_radeon_driver_file_fields *radeon_priv;
49
50	/* Hrm ... the story of the offset ... So this function converts
51	 * the various ideas of what userland clients might have for an
52	 * offset in the card address space into an offset into the card
53	 * address space :) So with a sane client, it should just keep
54	 * the value intact and just do some boundary checking. However,
55	 * not all clients are sane. Some older clients pass us 0 based
56	 * offsets relative to the start of the framebuffer and some may
57	 * assume the AGP aperture it appended to the framebuffer, so we
58	 * try to detect those cases and fix them up.
59	 *
60	 * Note: It might be a good idea here to make sure the offset lands
61	 * in some "allowed" area to protect things like the PCIE GART...
62	 */
63
64	/* First, the best case, the offset already lands in either the
65	 * framebuffer or the GART mapped space
66	 */
67	if (radeon_check_offset(dev_priv, off))
68		return 0;
69
70	/* Ok, that didn't happen... now check if we have a zero based
71	 * offset that fits in the framebuffer + gart space, apply the
72	 * magic offset we get from SETPARAM or calculated from fb_location
73	 */
74	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
75		radeon_priv = file_priv->driver_priv;
76		off += radeon_priv->radeon_fb_delta;
77	}
78
79	/* Finally, assume we aimed at a GART offset if beyond the fb */
80	if (off > fb_end)
81		off = off - fb_end - 1 + dev_priv->gart_vm_start;
82
83	/* Now recheck and fail if out of bounds */
84	if (radeon_check_offset(dev_priv, off)) {
85		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
86		*offset = off;
87		return 0;
88	}
89	return -EINVAL;
90}
91
92static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
93						     dev_priv,
94						     struct drm_file *file_priv,
95						     int id, struct drm_buffer *buf)
96{
97	u32 *data;
98	switch (id) {
99
100	case RADEON_EMIT_PP_MISC:
101		data = drm_buffer_pointer_to_dword(buf,
102			(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
103
104		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
105			DRM_ERROR("Invalid depth buffer offset\n");
106			return -EINVAL;
107		}
108		dev_priv->have_z_offset = 1;
109		break;
110
111	case RADEON_EMIT_PP_CNTL:
112		data = drm_buffer_pointer_to_dword(buf,
113			(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
114
115		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
116			DRM_ERROR("Invalid colour buffer offset\n");
117			return -EINVAL;
118		}
119		break;
120
121	case R200_EMIT_PP_TXOFFSET_0:
122	case R200_EMIT_PP_TXOFFSET_1:
123	case R200_EMIT_PP_TXOFFSET_2:
124	case R200_EMIT_PP_TXOFFSET_3:
125	case R200_EMIT_PP_TXOFFSET_4:
126	case R200_EMIT_PP_TXOFFSET_5:
127		data = drm_buffer_pointer_to_dword(buf, 0);
128		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
129			DRM_ERROR("Invalid R200 texture offset\n");
130			return -EINVAL;
131		}
132		break;
133
134	case RADEON_EMIT_PP_TXFILTER_0:
135	case RADEON_EMIT_PP_TXFILTER_1:
136	case RADEON_EMIT_PP_TXFILTER_2:
137		data = drm_buffer_pointer_to_dword(buf,
138			(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
139		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
140			DRM_ERROR("Invalid R100 texture offset\n");
141			return -EINVAL;
142		}
143		break;
144
145	case R200_EMIT_PP_CUBIC_OFFSETS_0:
146	case R200_EMIT_PP_CUBIC_OFFSETS_1:
147	case R200_EMIT_PP_CUBIC_OFFSETS_2:
148	case R200_EMIT_PP_CUBIC_OFFSETS_3:
149	case R200_EMIT_PP_CUBIC_OFFSETS_4:
150	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
151			int i;
152			for (i = 0; i < 5; i++) {
153				data = drm_buffer_pointer_to_dword(buf, i);
154				if (radeon_check_and_fixup_offset(dev_priv,
155								  file_priv,
156								  data)) {
157					DRM_ERROR
158					    ("Invalid R200 cubic texture offset\n");
159					return -EINVAL;
160				}
161			}
162			break;
163		}
164
165	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
166	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
167	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
168			int i;
169			for (i = 0; i < 5; i++) {
170				data = drm_buffer_pointer_to_dword(buf, i);
171				if (radeon_check_and_fixup_offset(dev_priv,
172								  file_priv,
173								  data)) {
174					DRM_ERROR
175					    ("Invalid R100 cubic texture offset\n");
176					return -EINVAL;
177				}
178			}
179		}
180		break;
181
182	case R200_EMIT_VAP_CTL:{
183			RING_LOCALS;
184			BEGIN_RING(2);
185			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
186			ADVANCE_RING();
187		}
188		break;
189
190	case RADEON_EMIT_RB3D_COLORPITCH:
191	case RADEON_EMIT_RE_LINE_PATTERN:
192	case RADEON_EMIT_SE_LINE_WIDTH:
193	case RADEON_EMIT_PP_LUM_MATRIX:
194	case RADEON_EMIT_PP_ROT_MATRIX_0:
195	case RADEON_EMIT_RB3D_STENCILREFMASK:
196	case RADEON_EMIT_SE_VPORT_XSCALE:
197	case RADEON_EMIT_SE_CNTL:
198	case RADEON_EMIT_SE_CNTL_STATUS:
199	case RADEON_EMIT_RE_MISC:
200	case RADEON_EMIT_PP_BORDER_COLOR_0:
201	case RADEON_EMIT_PP_BORDER_COLOR_1:
202	case RADEON_EMIT_PP_BORDER_COLOR_2:
203	case RADEON_EMIT_SE_ZBIAS_FACTOR:
204	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
205	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
206	case R200_EMIT_PP_TXCBLEND_0:
207	case R200_EMIT_PP_TXCBLEND_1:
208	case R200_EMIT_PP_TXCBLEND_2:
209	case R200_EMIT_PP_TXCBLEND_3:
210	case R200_EMIT_PP_TXCBLEND_4:
211	case R200_EMIT_PP_TXCBLEND_5:
212	case R200_EMIT_PP_TXCBLEND_6:
213	case R200_EMIT_PP_TXCBLEND_7:
214	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
215	case R200_EMIT_TFACTOR_0:
216	case R200_EMIT_VTX_FMT_0:
217	case R200_EMIT_MATRIX_SELECT_0:
218	case R200_EMIT_TEX_PROC_CTL_2:
219	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
220	case R200_EMIT_PP_TXFILTER_0:
221	case R200_EMIT_PP_TXFILTER_1:
222	case R200_EMIT_PP_TXFILTER_2:
223	case R200_EMIT_PP_TXFILTER_3:
224	case R200_EMIT_PP_TXFILTER_4:
225	case R200_EMIT_PP_TXFILTER_5:
226	case R200_EMIT_VTE_CNTL:
227	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
228	case R200_EMIT_PP_TAM_DEBUG3:
229	case R200_EMIT_PP_CNTL_X:
230	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
231	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
232	case R200_EMIT_RE_SCISSOR_TL_0:
233	case R200_EMIT_RE_SCISSOR_TL_1:
234	case R200_EMIT_RE_SCISSOR_TL_2:
235	case R200_EMIT_SE_VAP_CNTL_STATUS:
236	case R200_EMIT_SE_VTX_STATE_CNTL:
237	case R200_EMIT_RE_POINTSIZE:
238	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
239	case R200_EMIT_PP_CUBIC_FACES_0:
240	case R200_EMIT_PP_CUBIC_FACES_1:
241	case R200_EMIT_PP_CUBIC_FACES_2:
242	case R200_EMIT_PP_CUBIC_FACES_3:
243	case R200_EMIT_PP_CUBIC_FACES_4:
244	case R200_EMIT_PP_CUBIC_FACES_5:
245	case RADEON_EMIT_PP_TEX_SIZE_0:
246	case RADEON_EMIT_PP_TEX_SIZE_1:
247	case RADEON_EMIT_PP_TEX_SIZE_2:
248	case R200_EMIT_RB3D_BLENDCOLOR:
249	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
250	case RADEON_EMIT_PP_CUBIC_FACES_0:
251	case RADEON_EMIT_PP_CUBIC_FACES_1:
252	case RADEON_EMIT_PP_CUBIC_FACES_2:
253	case R200_EMIT_PP_TRI_PERF_CNTL:
254	case R200_EMIT_PP_AFS_0:
255	case R200_EMIT_PP_AFS_1:
256	case R200_EMIT_ATF_TFACTOR:
257	case R200_EMIT_PP_TXCTLALL_0:
258	case R200_EMIT_PP_TXCTLALL_1:
259	case R200_EMIT_PP_TXCTLALL_2:
260	case R200_EMIT_PP_TXCTLALL_3:
261	case R200_EMIT_PP_TXCTLALL_4:
262	case R200_EMIT_PP_TXCTLALL_5:
263	case R200_EMIT_VAP_PVS_CNTL:
264		/* These packets don't contain memory offsets */
265		break;
266
267	default:
268		DRM_ERROR("Unknown state packet ID %d\n", id);
269		return -EINVAL;
270	}
271
272	return 0;
273}
274
275static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
276					  dev_priv,
277					  struct drm_file *file_priv,
278					  drm_radeon_kcmd_buffer_t *
279					  cmdbuf,
280					  unsigned int *cmdsz)
281{
282	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
283	u32 offset, narrays;
284	int count, i, k;
285
286	count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
287	*cmdsz = 2 + count;
288
289	if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
290		DRM_ERROR("Not a type 3 packet\n");
291		return -EINVAL;
292	}
293
294	if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
295		DRM_ERROR("Packet size larger than size of data provided\n");
296		return -EINVAL;
297	}
298
299	switch (*cmd & 0xff00) {
300	/* XXX Are there old drivers needing other packets? */
301
302	case RADEON_3D_DRAW_IMMD:
303	case RADEON_3D_DRAW_VBUF:
304	case RADEON_3D_DRAW_INDX:
305	case RADEON_WAIT_FOR_IDLE:
306	case RADEON_CP_NOP:
307	case RADEON_3D_CLEAR_ZMASK:
308/*	case RADEON_CP_NEXT_CHAR:
309	case RADEON_CP_PLY_NEXTSCAN:
310	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
311		/* these packets are safe */
312		break;
313
314	case RADEON_CP_3D_DRAW_IMMD_2:
315	case RADEON_CP_3D_DRAW_VBUF_2:
316	case RADEON_CP_3D_DRAW_INDX_2:
317	case RADEON_3D_CLEAR_HIZ:
318		/* safe but r200 only */
319		if (dev_priv->microcode_version != UCODE_R200) {
320			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
321			return -EINVAL;
322		}
323		break;
324
325	case RADEON_3D_LOAD_VBPNTR:
326
327		if (count > 18) { /* 12 arrays max */
328			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
329				  count);
330			return -EINVAL;
331		}
332
333		/* carefully check packet contents */
334		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
335
336		narrays = *cmd & ~0xc000;
337		k = 0;
338		i = 2;
339		while ((k < narrays) && (i < (count + 2))) {
340			i++;		/* skip attribute field */
341			cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
342			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
343							  cmd)) {
344				DRM_ERROR
345				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
346				     k, i);
347				return -EINVAL;
348			}
349			k++;
350			i++;
351			if (k == narrays)
352				break;
353			/* have one more to process, they come in pairs */
354			cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
355
356			if (radeon_check_and_fixup_offset(dev_priv,
357							  file_priv, cmd))
358			{
359				DRM_ERROR
360				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
361				     k, i);
362				return -EINVAL;
363			}
364			k++;
365			i++;
366		}
367		/* do the counts match what we expect ? */
368		if ((k != narrays) || (i != (count + 2))) {
369			DRM_ERROR
370			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
371			      k, i, narrays, count + 1);
372			return -EINVAL;
373		}
374		break;
375
376	case RADEON_3D_RNDR_GEN_INDX_PRIM:
377		if (dev_priv->microcode_version != UCODE_R100) {
378			DRM_ERROR("Invalid 3d packet for r200-class chip\n");
379			return -EINVAL;
380		}
381
382		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
383		if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
384				DRM_ERROR("Invalid rndr_gen_indx offset\n");
385				return -EINVAL;
386		}
387		break;
388
389	case RADEON_CP_INDX_BUFFER:
390		if (dev_priv->microcode_version != UCODE_R200) {
391			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
392			return -EINVAL;
393		}
394
395		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
396		if ((*cmd & 0x8000ffff) != 0x80000810) {
397			DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
398			return -EINVAL;
399		}
400		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
401		if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
402			DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
403			return -EINVAL;
404		}
405		break;
406
407	case RADEON_CNTL_HOSTDATA_BLT:
408	case RADEON_CNTL_PAINT_MULTI:
409	case RADEON_CNTL_BITBLT_MULTI:
410		/* MSB of opcode: next DWORD GUI_CNTL */
411		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
412		if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
413			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
414			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
415			offset = *cmd2 << 10;
416			if (radeon_check_and_fixup_offset
417			    (dev_priv, file_priv, &offset)) {
418				DRM_ERROR("Invalid first packet offset\n");
419				return -EINVAL;
420			}
421			*cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
422		}
423
424		if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
425		    (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
426			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
427			offset = *cmd3 << 10;
428			if (radeon_check_and_fixup_offset
429			    (dev_priv, file_priv, &offset)) {
430				DRM_ERROR("Invalid second packet offset\n");
431				return -EINVAL;
432			}
433			*cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
434		}
435		break;
436
437	default:
438		DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
439		return -EINVAL;
440	}
441
442	return 0;
443}
444
445/* ================================================================
446 * CP hardware state programming functions
447 */
448
449static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
450				  struct drm_clip_rect * box)
451{
452	RING_LOCALS;
453
454	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
455		  box->x1, box->y1, box->x2, box->y2);
456
457	BEGIN_RING(4);
458	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
459	OUT_RING((box->y1 << 16) | box->x1);
460	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
461	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
462	ADVANCE_RING();
463}
464
465/* Emit 1.1 state
466 */
467static int radeon_emit_state(drm_radeon_private_t * dev_priv,
468			     struct drm_file *file_priv,
469			     drm_radeon_context_regs_t * ctx,
470			     drm_radeon_texture_regs_t * tex,
471			     unsigned int dirty)
472{
473	RING_LOCALS;
474	DRM_DEBUG("dirty=0x%08x\n", dirty);
475
476	if (dirty & RADEON_UPLOAD_CONTEXT) {
477		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
478						  &ctx->rb3d_depthoffset)) {
479			DRM_ERROR("Invalid depth buffer offset\n");
480			return -EINVAL;
481		}
482
483		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
484						  &ctx->rb3d_coloroffset)) {
485			DRM_ERROR("Invalid depth buffer offset\n");
486			return -EINVAL;
487		}
488
489		BEGIN_RING(14);
490		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
491		OUT_RING(ctx->pp_misc);
492		OUT_RING(ctx->pp_fog_color);
493		OUT_RING(ctx->re_solid_color);
494		OUT_RING(ctx->rb3d_blendcntl);
495		OUT_RING(ctx->rb3d_depthoffset);
496		OUT_RING(ctx->rb3d_depthpitch);
497		OUT_RING(ctx->rb3d_zstencilcntl);
498		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
499		OUT_RING(ctx->pp_cntl);
500		OUT_RING(ctx->rb3d_cntl);
501		OUT_RING(ctx->rb3d_coloroffset);
502		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
503		OUT_RING(ctx->rb3d_colorpitch);
504		ADVANCE_RING();
505	}
506
507	if (dirty & RADEON_UPLOAD_VERTFMT) {
508		BEGIN_RING(2);
509		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
510		OUT_RING(ctx->se_coord_fmt);
511		ADVANCE_RING();
512	}
513
514	if (dirty & RADEON_UPLOAD_LINE) {
515		BEGIN_RING(5);
516		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
517		OUT_RING(ctx->re_line_pattern);
518		OUT_RING(ctx->re_line_state);
519		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
520		OUT_RING(ctx->se_line_width);
521		ADVANCE_RING();
522	}
523
524	if (dirty & RADEON_UPLOAD_BUMPMAP) {
525		BEGIN_RING(5);
526		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
527		OUT_RING(ctx->pp_lum_matrix);
528		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
529		OUT_RING(ctx->pp_rot_matrix_0);
530		OUT_RING(ctx->pp_rot_matrix_1);
531		ADVANCE_RING();
532	}
533
534	if (dirty & RADEON_UPLOAD_MASKS) {
535		BEGIN_RING(4);
536		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
537		OUT_RING(ctx->rb3d_stencilrefmask);
538		OUT_RING(ctx->rb3d_ropcntl);
539		OUT_RING(ctx->rb3d_planemask);
540		ADVANCE_RING();
541	}
542
543	if (dirty & RADEON_UPLOAD_VIEWPORT) {
544		BEGIN_RING(7);
545		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
546		OUT_RING(ctx->se_vport_xscale);
547		OUT_RING(ctx->se_vport_xoffset);
548		OUT_RING(ctx->se_vport_yscale);
549		OUT_RING(ctx->se_vport_yoffset);
550		OUT_RING(ctx->se_vport_zscale);
551		OUT_RING(ctx->se_vport_zoffset);
552		ADVANCE_RING();
553	}
554
555	if (dirty & RADEON_UPLOAD_SETUP) {
556		BEGIN_RING(4);
557		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
558		OUT_RING(ctx->se_cntl);
559		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
560		OUT_RING(ctx->se_cntl_status);
561		ADVANCE_RING();
562	}
563
564	if (dirty & RADEON_UPLOAD_MISC) {
565		BEGIN_RING(2);
566		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
567		OUT_RING(ctx->re_misc);
568		ADVANCE_RING();
569	}
570
571	if (dirty & RADEON_UPLOAD_TEX0) {
572		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573						  &tex[0].pp_txoffset)) {
574			DRM_ERROR("Invalid texture offset for unit 0\n");
575			return -EINVAL;
576		}
577
578		BEGIN_RING(9);
579		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
580		OUT_RING(tex[0].pp_txfilter);
581		OUT_RING(tex[0].pp_txformat);
582		OUT_RING(tex[0].pp_txoffset);
583		OUT_RING(tex[0].pp_txcblend);
584		OUT_RING(tex[0].pp_txablend);
585		OUT_RING(tex[0].pp_tfactor);
586		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
587		OUT_RING(tex[0].pp_border_color);
588		ADVANCE_RING();
589	}
590
591	if (dirty & RADEON_UPLOAD_TEX1) {
592		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593						  &tex[1].pp_txoffset)) {
594			DRM_ERROR("Invalid texture offset for unit 1\n");
595			return -EINVAL;
596		}
597
598		BEGIN_RING(9);
599		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
600		OUT_RING(tex[1].pp_txfilter);
601		OUT_RING(tex[1].pp_txformat);
602		OUT_RING(tex[1].pp_txoffset);
603		OUT_RING(tex[1].pp_txcblend);
604		OUT_RING(tex[1].pp_txablend);
605		OUT_RING(tex[1].pp_tfactor);
606		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
607		OUT_RING(tex[1].pp_border_color);
608		ADVANCE_RING();
609	}
610
611	if (dirty & RADEON_UPLOAD_TEX2) {
612		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
613						  &tex[2].pp_txoffset)) {
614			DRM_ERROR("Invalid texture offset for unit 2\n");
615			return -EINVAL;
616		}
617
618		BEGIN_RING(9);
619		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
620		OUT_RING(tex[2].pp_txfilter);
621		OUT_RING(tex[2].pp_txformat);
622		OUT_RING(tex[2].pp_txoffset);
623		OUT_RING(tex[2].pp_txcblend);
624		OUT_RING(tex[2].pp_txablend);
625		OUT_RING(tex[2].pp_tfactor);
626		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
627		OUT_RING(tex[2].pp_border_color);
628		ADVANCE_RING();
629	}
630
631	return 0;
632}
633
634/* Emit 1.2 state
635 */
636static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
637			      struct drm_file *file_priv,
638			      drm_radeon_state_t * state)
639{
640	RING_LOCALS;
641
642	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
643		BEGIN_RING(3);
644		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
645		OUT_RING(state->context2.se_zbias_factor);
646		OUT_RING(state->context2.se_zbias_constant);
647		ADVANCE_RING();
648	}
649
650	return radeon_emit_state(dev_priv, file_priv, &state->context,
651				 state->tex, state->dirty);
652}
653
654/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
655 * 1.3 cmdbuffers allow all previous state to be updated as well as
656 * the tcl scalar and vector areas.
657 */
658static struct {
659	int start;
660	int len;
661	const char *name;
662} packet[RADEON_MAX_STATE_PACKETS] = {
663	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
664	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
665	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
666	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
667	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
668	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
669	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
670	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
671	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
672	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
673	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
674	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
675	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
676	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
677	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
678	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
679	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
680	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
681	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
682	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
683	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
684		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
685	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
686	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
687	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
688	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
689	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
690	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
691	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
692	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
693	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
694	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
695	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
696	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
697	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
698	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
699	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
700	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
701	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
702	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
703	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
704	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
705	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
706	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
707	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
708	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
709	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
710	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
711	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
712	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
713	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
714	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
715	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
716	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
717	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
718	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
719	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
720	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
721	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
722	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
723	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
724	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
725	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
726		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
727	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
728	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
729	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
730	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
731	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
732	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
733	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
734	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
735	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
736	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
737	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
738	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
739	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
740	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
741	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
742	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
743	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
744	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
745	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
746	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
747	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
748	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
749	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
750	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
751	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
752	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
753	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
754	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
755	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
756	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
757	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
758	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
759	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
760	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
761};
762
763/* ================================================================
764 * Performance monitoring functions
765 */
766
767static void radeon_clear_box(drm_radeon_private_t * dev_priv,
768			     struct drm_radeon_master_private *master_priv,
769			     int x, int y, int w, int h, int r, int g, int b)
770{
771	u32 color;
772	RING_LOCALS;
773
774	x += master_priv->sarea_priv->boxes[0].x1;
775	y += master_priv->sarea_priv->boxes[0].y1;
776
777	switch (dev_priv->color_fmt) {
778	case RADEON_COLOR_FORMAT_RGB565:
779		color = (((r & 0xf8) << 8) |
780			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
781		break;
782	case RADEON_COLOR_FORMAT_ARGB8888:
783	default:
784		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
785		break;
786	}
787
788	BEGIN_RING(4);
789	RADEON_WAIT_UNTIL_3D_IDLE();
790	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
791	OUT_RING(0xffffffff);
792	ADVANCE_RING();
793
794	BEGIN_RING(6);
795
796	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
797	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
798		 RADEON_GMC_BRUSH_SOLID_COLOR |
799		 (dev_priv->color_fmt << 8) |
800		 RADEON_GMC_SRC_DATATYPE_COLOR |
801		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
802
803	if (master_priv->sarea_priv->pfCurrentPage == 1) {
804		OUT_RING(dev_priv->front_pitch_offset);
805	} else {
806		OUT_RING(dev_priv->back_pitch_offset);
807	}
808
809	OUT_RING(color);
810
811	OUT_RING((x << 16) | y);
812	OUT_RING((w << 16) | h);
813
814	ADVANCE_RING();
815}
816
817static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
818{
819	/* Collapse various things into a wait flag -- trying to
820	 * guess if userspase slept -- better just to have them tell us.
821	 */
822	if (dev_priv->stats.last_frame_reads > 1 ||
823	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
824		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
825	}
826
827	if (dev_priv->stats.freelist_loops) {
828		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
829	}
830
831	/* Purple box for page flipping
832	 */
833	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
834		radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
835
836	/* Red box if we have to wait for idle at any point
837	 */
838	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
839		radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
840
841	/* Blue box: lost context?
842	 */
843
844	/* Yellow box for texture swaps
845	 */
846	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
847		radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
848
849	/* Green box if hardware never idles (as far as we can tell)
850	 */
851	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
852		radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
853
854	/* Draw bars indicating number of buffers allocated
855	 * (not a great measure, easily confused)
856	 */
857	if (dev_priv->stats.requested_bufs) {
858		if (dev_priv->stats.requested_bufs > 100)
859			dev_priv->stats.requested_bufs = 100;
860
861		radeon_clear_box(dev_priv, master_priv, 4, 16,
862				 dev_priv->stats.requested_bufs, 4,
863				 196, 128, 128);
864	}
865
866	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
867
868}
869
870/* ================================================================
871 * CP command dispatch functions
872 */
873
874static void radeon_cp_dispatch_clear(struct drm_device * dev,
875				     struct drm_master *master,
876				     drm_radeon_clear_t * clear,
877				     drm_radeon_clear_rect_t * depth_boxes)
878{
879	drm_radeon_private_t *dev_priv = dev->dev_private;
880	struct drm_radeon_master_private *master_priv = master->driver_priv;
881	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
882	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
883	int nbox = sarea_priv->nbox;
884	struct drm_clip_rect *pbox = sarea_priv->boxes;
885	unsigned int flags = clear->flags;
886	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
887	int i;
888	RING_LOCALS;
889	DRM_DEBUG("flags = 0x%x\n", flags);
890
891	dev_priv->stats.clears++;
892
893	if (sarea_priv->pfCurrentPage == 1) {
894		unsigned int tmp = flags;
895
896		flags &= ~(RADEON_FRONT | RADEON_BACK);
897		if (tmp & RADEON_FRONT)
898			flags |= RADEON_BACK;
899		if (tmp & RADEON_BACK)
900			flags |= RADEON_FRONT;
901	}
902	if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
903		if (!dev_priv->have_z_offset) {
904			printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
905			flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
906		}
907	}
908
909	if (flags & (RADEON_FRONT | RADEON_BACK)) {
910
911		BEGIN_RING(4);
912
913		/* Ensure the 3D stream is idle before doing a
914		 * 2D fill to clear the front or back buffer.
915		 */
916		RADEON_WAIT_UNTIL_3D_IDLE();
917
918		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
919		OUT_RING(clear->color_mask);
920
921		ADVANCE_RING();
922
923		/* Make sure we restore the 3D state next time.
924		 */
925		sarea_priv->ctx_owner = 0;
926
927		for (i = 0; i < nbox; i++) {
928			int x = pbox[i].x1;
929			int y = pbox[i].y1;
930			int w = pbox[i].x2 - x;
931			int h = pbox[i].y2 - y;
932
933			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
934				  x, y, w, h, flags);
935
936			if (flags & RADEON_FRONT) {
937				BEGIN_RING(6);
938
939				OUT_RING(CP_PACKET3
940					 (RADEON_CNTL_PAINT_MULTI, 4));
941				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
942					 RADEON_GMC_BRUSH_SOLID_COLOR |
943					 (dev_priv->
944					  color_fmt << 8) |
945					 RADEON_GMC_SRC_DATATYPE_COLOR |
946					 RADEON_ROP3_P |
947					 RADEON_GMC_CLR_CMP_CNTL_DIS);
948
949				OUT_RING(dev_priv->front_pitch_offset);
950				OUT_RING(clear->clear_color);
951
952				OUT_RING((x << 16) | y);
953				OUT_RING((w << 16) | h);
954
955				ADVANCE_RING();
956			}
957
958			if (flags & RADEON_BACK) {
959				BEGIN_RING(6);
960
961				OUT_RING(CP_PACKET3
962					 (RADEON_CNTL_PAINT_MULTI, 4));
963				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
964					 RADEON_GMC_BRUSH_SOLID_COLOR |
965					 (dev_priv->
966					  color_fmt << 8) |
967					 RADEON_GMC_SRC_DATATYPE_COLOR |
968					 RADEON_ROP3_P |
969					 RADEON_GMC_CLR_CMP_CNTL_DIS);
970
971				OUT_RING(dev_priv->back_pitch_offset);
972				OUT_RING(clear->clear_color);
973
974				OUT_RING((x << 16) | y);
975				OUT_RING((w << 16) | h);
976
977				ADVANCE_RING();
978			}
979		}
980	}
981
982	/* hyper z clear */
983	/* no docs available, based on reverse engineering by Stephane Marchesin */
984	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
985	    && (flags & RADEON_CLEAR_FASTZ)) {
986
987		int i;
988		int depthpixperline =
989		    dev_priv->depth_fmt ==
990		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
991						       2) : (dev_priv->
992							     depth_pitch / 4);
993
994		u32 clearmask;
995
996		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
997		    ((clear->depth_mask & 0xff) << 24);
998
999		/* Make sure we restore the 3D state next time.
1000		 * we haven't touched any "normal" state - still need this?
1001		 */
1002		sarea_priv->ctx_owner = 0;
1003
1004		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1005		    && (flags & RADEON_USE_HIERZ)) {
1006			/* FIXME : reverse engineer that for Rx00 cards */
1007			/* FIXME : the mask supposedly contains low-res z values. So can't set
1008			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1009			   value into account? */
1010			/* pattern seems to work for r100, though get slight
1011			   rendering errors with glxgears. If hierz is not enabled for r100,
1012			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
1013			   other ones are ignored, and the same clear mask can be used. That's
1014			   very different behaviour than R200 which needs different clear mask
1015			   and different number of tiles to clear if hierz is enabled or not !?!
1016			 */
1017			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1018		} else {
1019			/* clear mask : chooses the clearing pattern.
1020			   rv250: could be used to clear only parts of macrotiles
1021			   (but that would get really complicated...)?
1022			   bit 0 and 1 (either or both of them ?!?!) are used to
1023			   not clear tile (or maybe one of the bits indicates if the tile is
1024			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
1025			   Pattern is as follows:
1026			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1027			   bits -------------------------------------------------
1028			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1029			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1030			   covers 256 pixels ?!?
1031			 */
1032			clearmask = 0x0;
1033		}
1034
1035		BEGIN_RING(8);
1036		RADEON_WAIT_UNTIL_2D_IDLE();
1037		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1038			     tempRB3D_DEPTHCLEARVALUE);
1039		/* what offset is this exactly ? */
1040		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1041		/* need ctlstat, otherwise get some strange black flickering */
1042		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1043			     RADEON_RB3D_ZC_FLUSH_ALL);
1044		ADVANCE_RING();
1045
1046		for (i = 0; i < nbox; i++) {
1047			int tileoffset, nrtilesx, nrtilesy, j;
1048			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1049			if ((dev_priv->flags & RADEON_HAS_HIERZ)
1050			    && !(dev_priv->microcode_version == UCODE_R200)) {
1051				/* FIXME : figure this out for r200 (when hierz is enabled). Or
1052				   maybe r200 actually doesn't need to put the low-res z value into
1053				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
1054				   Works for R100, both with hierz and without.
1055				   R100 seems to operate on 2x1 8x8 tiles, but...
1056				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1057				   problematic with resolutions which are not 64 pix aligned? */
1058				tileoffset =
1059				    ((pbox[i].y1 >> 3) * depthpixperline +
1060				     pbox[i].x1) >> 6;
1061				nrtilesx =
1062				    ((pbox[i].x2 & ~63) -
1063				     (pbox[i].x1 & ~63)) >> 4;
1064				nrtilesy =
1065				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1066				for (j = 0; j <= nrtilesy; j++) {
1067					BEGIN_RING(4);
1068					OUT_RING(CP_PACKET3
1069						 (RADEON_3D_CLEAR_ZMASK, 2));
1070					/* first tile */
1071					OUT_RING(tileoffset * 8);
1072					/* the number of tiles to clear */
1073					OUT_RING(nrtilesx + 4);
1074					/* clear mask : chooses the clearing pattern. */
1075					OUT_RING(clearmask);
1076					ADVANCE_RING();
1077					tileoffset += depthpixperline >> 6;
1078				}
1079			} else if (dev_priv->microcode_version == UCODE_R200) {
1080				/* works for rv250. */
1081				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
1082				tileoffset =
1083				    ((pbox[i].y1 >> 3) * depthpixperline +
1084				     pbox[i].x1) >> 5;
1085				nrtilesx =
1086				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1087				nrtilesy =
1088				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1089				for (j = 0; j <= nrtilesy; j++) {
1090					BEGIN_RING(4);
1091					OUT_RING(CP_PACKET3
1092						 (RADEON_3D_CLEAR_ZMASK, 2));
1093					/* first tile */
1094					/* judging by the first tile offset needed, could possibly
1095					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
1096					   macro tiles, though would still need clear mask for
1097					   right/bottom if truly 4x4 granularity is desired ? */
1098					OUT_RING(tileoffset * 16);
1099					/* the number of tiles to clear */
1100					OUT_RING(nrtilesx + 1);
1101					/* clear mask : chooses the clearing pattern. */
1102					OUT_RING(clearmask);
1103					ADVANCE_RING();
1104					tileoffset += depthpixperline >> 5;
1105				}
1106			} else {	/* rv 100 */
1107				/* rv100 might not need 64 pix alignment, who knows */
1108				/* offsets are, hmm, weird */
1109				tileoffset =
1110				    ((pbox[i].y1 >> 4) * depthpixperline +
1111				     pbox[i].x1) >> 6;
1112				nrtilesx =
1113				    ((pbox[i].x2 & ~63) -
1114				     (pbox[i].x1 & ~63)) >> 4;
1115				nrtilesy =
1116				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1117				for (j = 0; j <= nrtilesy; j++) {
1118					BEGIN_RING(4);
1119					OUT_RING(CP_PACKET3
1120						 (RADEON_3D_CLEAR_ZMASK, 2));
1121					OUT_RING(tileoffset * 128);
1122					/* the number of tiles to clear */
1123					OUT_RING(nrtilesx + 4);
1124					/* clear mask : chooses the clearing pattern. */
1125					OUT_RING(clearmask);
1126					ADVANCE_RING();
1127					tileoffset += depthpixperline >> 6;
1128				}
1129			}
1130		}
1131
1132		/* TODO don't always clear all hi-level z tiles */
1133		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1134		    && (dev_priv->microcode_version == UCODE_R200)
1135		    && (flags & RADEON_USE_HIERZ))
1136			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1137			/* FIXME : the mask supposedly contains low-res z values. So can't set
1138			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1139			   value into account? */
1140		{
1141			BEGIN_RING(4);
1142			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1143			OUT_RING(0x0);	/* First tile */
1144			OUT_RING(0x3cc0);
1145			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1146			ADVANCE_RING();
1147		}
1148	}
1149
1150	/* We have to clear the depth and/or stencil buffers by
1151	 * rendering a quad into just those buffers.  Thus, we have to
1152	 * make sure the 3D engine is configured correctly.
1153	 */
1154	else if ((dev_priv->microcode_version == UCODE_R200) &&
1155		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1156
1157		int tempPP_CNTL;
1158		int tempRE_CNTL;
1159		int tempRB3D_CNTL;
1160		int tempRB3D_ZSTENCILCNTL;
1161		int tempRB3D_STENCILREFMASK;
1162		int tempRB3D_PLANEMASK;
1163		int tempSE_CNTL;
1164		int tempSE_VTE_CNTL;
1165		int tempSE_VTX_FMT_0;
1166		int tempSE_VTX_FMT_1;
1167		int tempSE_VAP_CNTL;
1168		int tempRE_AUX_SCISSOR_CNTL;
1169
1170		tempPP_CNTL = 0;
1171		tempRE_CNTL = 0;
1172
1173		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1174
1175		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1176		tempRB3D_STENCILREFMASK = 0x0;
1177
1178		tempSE_CNTL = depth_clear->se_cntl;
1179
1180		/* Disable TCL */
1181
1182		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1183					  (0x9 <<
1184					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1185
1186		tempRB3D_PLANEMASK = 0x0;
1187
1188		tempRE_AUX_SCISSOR_CNTL = 0x0;
1189
1190		tempSE_VTE_CNTL =
1191		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1192
1193		/* Vertex format (X, Y, Z, W) */
1194		tempSE_VTX_FMT_0 =
1195		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1196		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1197		tempSE_VTX_FMT_1 = 0x0;
1198
1199		/*
1200		 * Depth buffer specific enables
1201		 */
1202		if (flags & RADEON_DEPTH) {
1203			/* Enable depth buffer */
1204			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1205		} else {
1206			/* Disable depth buffer */
1207			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1208		}
1209
1210		/*
1211		 * Stencil buffer specific enables
1212		 */
1213		if (flags & RADEON_STENCIL) {
1214			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1215			tempRB3D_STENCILREFMASK = clear->depth_mask;
1216		} else {
1217			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1218			tempRB3D_STENCILREFMASK = 0x00000000;
1219		}
1220
1221		if (flags & RADEON_USE_COMP_ZBUF) {
1222			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1223			    RADEON_Z_DECOMPRESSION_ENABLE;
1224		}
1225		if (flags & RADEON_USE_HIERZ) {
1226			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1227		}
1228
1229		BEGIN_RING(26);
1230		RADEON_WAIT_UNTIL_2D_IDLE();
1231
1232		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1233		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1234		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1235		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1236		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1237			     tempRB3D_STENCILREFMASK);
1238		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1239		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1240		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1241		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1242		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1243		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1244		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1245		ADVANCE_RING();
1246
1247		/* Make sure we restore the 3D state next time.
1248		 */
1249		sarea_priv->ctx_owner = 0;
1250
1251		for (i = 0; i < nbox; i++) {
1252
1253			/* Funny that this should be required --
1254			 *  sets top-left?
1255			 */
1256			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1257
1258			BEGIN_RING(14);
1259			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1260			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1261				  RADEON_PRIM_WALK_RING |
1262				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1263			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1264			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1265			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1266			OUT_RING(0x3f800000);
1267			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1268			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1269			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1270			OUT_RING(0x3f800000);
1271			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1272			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1273			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1274			OUT_RING(0x3f800000);
1275			ADVANCE_RING();
1276		}
1277	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1278
1279		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1280
1281		rb3d_cntl = depth_clear->rb3d_cntl;
1282
1283		if (flags & RADEON_DEPTH) {
1284			rb3d_cntl |= RADEON_Z_ENABLE;
1285		} else {
1286			rb3d_cntl &= ~RADEON_Z_ENABLE;
1287		}
1288
1289		if (flags & RADEON_STENCIL) {
1290			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1291			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
1292		} else {
1293			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1294			rb3d_stencilrefmask = 0x00000000;
1295		}
1296
1297		if (flags & RADEON_USE_COMP_ZBUF) {
1298			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1299			    RADEON_Z_DECOMPRESSION_ENABLE;
1300		}
1301		if (flags & RADEON_USE_HIERZ) {
1302			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1303		}
1304
1305		BEGIN_RING(13);
1306		RADEON_WAIT_UNTIL_2D_IDLE();
1307
1308		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1309		OUT_RING(0x00000000);
1310		OUT_RING(rb3d_cntl);
1311
1312		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1313		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1314		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1315		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1316		ADVANCE_RING();
1317
1318		/* Make sure we restore the 3D state next time.
1319		 */
1320		sarea_priv->ctx_owner = 0;
1321
1322		for (i = 0; i < nbox; i++) {
1323
1324			/* Funny that this should be required --
1325			 *  sets top-left?
1326			 */
1327			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1328
1329			BEGIN_RING(15);
1330
1331			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1332			OUT_RING(RADEON_VTX_Z_PRESENT |
1333				 RADEON_VTX_PKCOLOR_PRESENT);
1334			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1335				  RADEON_PRIM_WALK_RING |
1336				  RADEON_MAOS_ENABLE |
1337				  RADEON_VTX_FMT_RADEON_MODE |
1338				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1339
1340			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1341			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1342			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1343			OUT_RING(0x0);
1344
1345			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1346			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1347			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1348			OUT_RING(0x0);
1349
1350			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1351			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1352			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1353			OUT_RING(0x0);
1354
1355			ADVANCE_RING();
1356		}
1357	}
1358
1359	/* Increment the clear counter.  The client-side 3D driver must
1360	 * wait on this value before performing the clear ioctl.  We
1361	 * need this because the card's so damned fast...
1362	 */
1363	sarea_priv->last_clear++;
1364
1365	BEGIN_RING(4);
1366
1367	RADEON_CLEAR_AGE(sarea_priv->last_clear);
1368	RADEON_WAIT_UNTIL_IDLE();
1369
1370	ADVANCE_RING();
1371}
1372
1373static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1374{
1375	drm_radeon_private_t *dev_priv = dev->dev_private;
1376	struct drm_radeon_master_private *master_priv = master->driver_priv;
1377	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1378	int nbox = sarea_priv->nbox;
1379	struct drm_clip_rect *pbox = sarea_priv->boxes;
1380	int i;
1381	RING_LOCALS;
1382	DRM_DEBUG("\n");
1383
1384	/* Do some trivial performance monitoring...
1385	 */
1386	if (dev_priv->do_boxes)
1387		radeon_cp_performance_boxes(dev_priv, master_priv);
1388
1389	/* Wait for the 3D stream to idle before dispatching the bitblt.
1390	 * This will prevent data corruption between the two streams.
1391	 */
1392	BEGIN_RING(2);
1393
1394	RADEON_WAIT_UNTIL_3D_IDLE();
1395
1396	ADVANCE_RING();
1397
1398	for (i = 0; i < nbox; i++) {
1399		int x = pbox[i].x1;
1400		int y = pbox[i].y1;
1401		int w = pbox[i].x2 - x;
1402		int h = pbox[i].y2 - y;
1403
1404		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1405
1406		BEGIN_RING(9);
1407
1408		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1409		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1410			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1411			 RADEON_GMC_BRUSH_NONE |
1412			 (dev_priv->color_fmt << 8) |
1413			 RADEON_GMC_SRC_DATATYPE_COLOR |
1414			 RADEON_ROP3_S |
1415			 RADEON_DP_SRC_SOURCE_MEMORY |
1416			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1417
1418		/* Make this work even if front & back are flipped:
1419		 */
1420		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1421		if (sarea_priv->pfCurrentPage == 0) {
1422			OUT_RING(dev_priv->back_pitch_offset);
1423			OUT_RING(dev_priv->front_pitch_offset);
1424		} else {
1425			OUT_RING(dev_priv->front_pitch_offset);
1426			OUT_RING(dev_priv->back_pitch_offset);
1427		}
1428
1429		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1430		OUT_RING((x << 16) | y);
1431		OUT_RING((x << 16) | y);
1432		OUT_RING((w << 16) | h);
1433
1434		ADVANCE_RING();
1435	}
1436
1437	/* Increment the frame counter.  The client-side 3D driver must
1438	 * throttle the framerate by waiting for this value before
1439	 * performing the swapbuffer ioctl.
1440	 */
1441	sarea_priv->last_frame++;
1442
1443	BEGIN_RING(4);
1444
1445	RADEON_FRAME_AGE(sarea_priv->last_frame);
1446	RADEON_WAIT_UNTIL_2D_IDLE();
1447
1448	ADVANCE_RING();
1449}
1450
1451void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1452{
1453	drm_radeon_private_t *dev_priv = dev->dev_private;
1454	struct drm_radeon_master_private *master_priv = master->driver_priv;
1455	struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1456	int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1457	    ? dev_priv->front_offset : dev_priv->back_offset;
1458	RING_LOCALS;
1459	DRM_DEBUG("pfCurrentPage=%d\n",
1460		  master_priv->sarea_priv->pfCurrentPage);
1461
1462	/* Do some trivial performance monitoring...
1463	 */
1464	if (dev_priv->do_boxes) {
1465		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1466		radeon_cp_performance_boxes(dev_priv, master_priv);
1467	}
1468
1469	/* Update the frame offsets for both CRTCs
1470	 */
1471	BEGIN_RING(6);
1472
1473	RADEON_WAIT_UNTIL_3D_IDLE();
1474	OUT_RING_REG(RADEON_CRTC_OFFSET,
1475		     ((sarea->frame.y * dev_priv->front_pitch +
1476		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1477		     + offset);
1478	OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1479		     + offset);
1480
1481	ADVANCE_RING();
1482
1483	/* Increment the frame counter.  The client-side 3D driver must
1484	 * throttle the framerate by waiting for this value before
1485	 * performing the swapbuffer ioctl.
1486	 */
1487	master_priv->sarea_priv->last_frame++;
1488	master_priv->sarea_priv->pfCurrentPage =
1489		1 - master_priv->sarea_priv->pfCurrentPage;
1490
1491	BEGIN_RING(2);
1492
1493	RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1494
1495	ADVANCE_RING();
1496}
1497
1498static int bad_prim_vertex_nr(int primitive, int nr)
1499{
1500	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1501	case RADEON_PRIM_TYPE_NONE:
1502	case RADEON_PRIM_TYPE_POINT:
1503		return nr < 1;
1504	case RADEON_PRIM_TYPE_LINE:
1505		return (nr & 1) || nr == 0;
1506	case RADEON_PRIM_TYPE_LINE_STRIP:
1507		return nr < 2;
1508	case RADEON_PRIM_TYPE_TRI_LIST:
1509	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1510	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1511	case RADEON_PRIM_TYPE_RECT_LIST:
1512		return nr % 3 || nr == 0;
1513	case RADEON_PRIM_TYPE_TRI_FAN:
1514	case RADEON_PRIM_TYPE_TRI_STRIP:
1515		return nr < 3;
1516	default:
1517		return 1;
1518	}
1519}
1520
1521typedef struct {
1522	unsigned int start;
1523	unsigned int finish;
1524	unsigned int prim;
1525	unsigned int numverts;
1526	unsigned int offset;
1527	unsigned int vc_format;
1528} drm_radeon_tcl_prim_t;
1529
1530static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1531				      struct drm_file *file_priv,
1532				      struct drm_buf * buf,
1533				      drm_radeon_tcl_prim_t * prim)
1534{
1535	drm_radeon_private_t *dev_priv = dev->dev_private;
1536	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1537	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1538	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1539	int numverts = (int)prim->numverts;
1540	int nbox = sarea_priv->nbox;
1541	int i = 0;
1542	RING_LOCALS;
1543
1544	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1545		  prim->prim,
1546		  prim->vc_format, prim->start, prim->finish, prim->numverts);
1547
1548	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1549		DRM_ERROR("bad prim %x numverts %d\n",
1550			  prim->prim, prim->numverts);
1551		return;
1552	}
1553
1554	do {
1555		/* Emit the next cliprect */
1556		if (i < nbox) {
1557			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1558		}
1559
1560		/* Emit the vertex buffer rendering commands */
1561		BEGIN_RING(5);
1562
1563		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1564		OUT_RING(offset);
1565		OUT_RING(numverts);
1566		OUT_RING(prim->vc_format);
1567		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1568			 RADEON_COLOR_ORDER_RGBA |
1569			 RADEON_VTX_FMT_RADEON_MODE |
1570			 (numverts << RADEON_NUM_VERTICES_SHIFT));
1571
1572		ADVANCE_RING();
1573
1574		i++;
1575	} while (i < nbox);
1576}
1577
1578void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1579{
1580	drm_radeon_private_t *dev_priv = dev->dev_private;
1581	struct drm_radeon_master_private *master_priv = master->driver_priv;
1582	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1583	RING_LOCALS;
1584
1585	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1586
1587	/* Emit the vertex buffer age */
1588	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1589		BEGIN_RING(3);
1590		R600_DISPATCH_AGE(buf_priv->age);
1591		ADVANCE_RING();
1592	} else {
1593		BEGIN_RING(2);
1594		RADEON_DISPATCH_AGE(buf_priv->age);
1595		ADVANCE_RING();
1596	}
1597
1598	buf->pending = 1;
1599	buf->used = 0;
1600}
1601
1602static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1603					struct drm_buf * buf, int start, int end)
1604{
1605	drm_radeon_private_t *dev_priv = dev->dev_private;
1606	RING_LOCALS;
1607	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1608
1609	if (start != end) {
1610		int offset = (dev_priv->gart_buffers_offset
1611			      + buf->offset + start);
1612		int dwords = (end - start + 3) / sizeof(u32);
1613
1614		/* Indirect buffer data must be an even number of
1615		 * dwords, so if we've been given an odd number we must
1616		 * pad the data with a Type-2 CP packet.
1617		 */
1618		if (dwords & 1) {
1619			u32 *data = (u32 *)
1620			    ((char *)dev->agp_buffer_map->handle
1621			     + buf->offset + start);
1622			data[dwords++] = RADEON_CP_PACKET2;
1623		}
1624
1625		/* Fire off the indirect buffer */
1626		BEGIN_RING(3);
1627
1628		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1629		OUT_RING(offset);
1630		OUT_RING(dwords);
1631
1632		ADVANCE_RING();
1633	}
1634}
1635
1636static void radeon_cp_dispatch_indices(struct drm_device *dev,
1637				       struct drm_master *master,
1638				       struct drm_buf * elt_buf,
1639				       drm_radeon_tcl_prim_t * prim)
1640{
1641	drm_radeon_private_t *dev_priv = dev->dev_private;
1642	struct drm_radeon_master_private *master_priv = master->driver_priv;
1643	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1644	int offset = dev_priv->gart_buffers_offset + prim->offset;
1645	u32 *data;
1646	int dwords;
1647	int i = 0;
1648	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1649	int count = (prim->finish - start) / sizeof(u16);
1650	int nbox = sarea_priv->nbox;
1651
1652	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1653		  prim->prim,
1654		  prim->vc_format,
1655		  prim->start, prim->finish, prim->offset, prim->numverts);
1656
1657	if (bad_prim_vertex_nr(prim->prim, count)) {
1658		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1659		return;
1660	}
1661
1662	if (start >= prim->finish || (prim->start & 0x7)) {
1663		DRM_ERROR("buffer prim %d\n", prim->prim);
1664		return;
1665	}
1666
1667	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1668
1669	data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1670			elt_buf->offset + prim->start);
1671
1672	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1673	data[1] = offset;
1674	data[2] = prim->numverts;
1675	data[3] = prim->vc_format;
1676	data[4] = (prim->prim |
1677		   RADEON_PRIM_WALK_IND |
1678		   RADEON_COLOR_ORDER_RGBA |
1679		   RADEON_VTX_FMT_RADEON_MODE |
1680		   (count << RADEON_NUM_VERTICES_SHIFT));
1681
1682	do {
1683		if (i < nbox)
1684			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1685
1686		radeon_cp_dispatch_indirect(dev, elt_buf,
1687					    prim->start, prim->finish);
1688
1689		i++;
1690	} while (i < nbox);
1691
1692}
1693
1694#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1695
1696static int radeon_cp_dispatch_texture(struct drm_device * dev,
1697				      struct drm_file *file_priv,
1698				      drm_radeon_texture_t * tex,
1699				      drm_radeon_tex_image_t * image)
1700{
1701	drm_radeon_private_t *dev_priv = dev->dev_private;
1702	struct drm_buf *buf;
1703	u32 format;
1704	u32 *buffer;
1705	const u8 __user *data;
1706	int size, dwords, tex_width, blit_width, spitch;
1707	u32 height;
1708	int i;
1709	u32 texpitch, microtile;
1710	u32 offset, byte_offset;
1711	RING_LOCALS;
1712
1713	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1714		DRM_ERROR("Invalid destination offset\n");
1715		return -EINVAL;
1716	}
1717
1718	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1719
1720	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1721	 * up with the texture data from the host data blit, otherwise
1722	 * part of the texture image may be corrupted.
1723	 */
1724	BEGIN_RING(4);
1725	RADEON_FLUSH_CACHE();
1726	RADEON_WAIT_UNTIL_IDLE();
1727	ADVANCE_RING();
1728
1729	/* The compiler won't optimize away a division by a variable,
1730	 * even if the only legal values are powers of two.  Thus, we'll
1731	 * use a shift instead.
1732	 */
1733	switch (tex->format) {
1734	case RADEON_TXFORMAT_ARGB8888:
1735	case RADEON_TXFORMAT_RGBA8888:
1736		format = RADEON_COLOR_FORMAT_ARGB8888;
1737		tex_width = tex->width * 4;
1738		blit_width = image->width * 4;
1739		break;
1740	case RADEON_TXFORMAT_AI88:
1741	case RADEON_TXFORMAT_ARGB1555:
1742	case RADEON_TXFORMAT_RGB565:
1743	case RADEON_TXFORMAT_ARGB4444:
1744	case RADEON_TXFORMAT_VYUY422:
1745	case RADEON_TXFORMAT_YVYU422:
1746		format = RADEON_COLOR_FORMAT_RGB565;
1747		tex_width = tex->width * 2;
1748		blit_width = image->width * 2;
1749		break;
1750	case RADEON_TXFORMAT_I8:
1751	case RADEON_TXFORMAT_RGB332:
1752		format = RADEON_COLOR_FORMAT_CI8;
1753		tex_width = tex->width * 1;
1754		blit_width = image->width * 1;
1755		break;
1756	default:
1757		DRM_ERROR("invalid texture format %d\n", tex->format);
1758		return -EINVAL;
1759	}
1760	spitch = blit_width >> 6;
1761	if (spitch == 0 && image->height > 1)
1762		return -EINVAL;
1763
1764	texpitch = tex->pitch;
1765	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1766		microtile = 1;
1767		if (tex_width < 64) {
1768			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1769			/* we got tiled coordinates, untile them */
1770			image->x *= 2;
1771		}
1772	} else
1773		microtile = 0;
1774
1775	/* this might fail for zero-sized uploads - are those illegal? */
1776	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1777				blit_width - 1)) {
1778		DRM_ERROR("Invalid final destination offset\n");
1779		return -EINVAL;
1780	}
1781
1782	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1783
1784	do {
1785		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1786			  tex->offset >> 10, tex->pitch, tex->format,
1787			  image->x, image->y, image->width, image->height);
1788
1789		/* Make a copy of some parameters in case we have to
1790		 * update them for a multi-pass texture blit.
1791		 */
1792		height = image->height;
1793		data = (const u8 __user *)image->data;
1794
1795		size = height * blit_width;
1796
1797		if (size > RADEON_MAX_TEXTURE_SIZE) {
1798			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1799			size = height * blit_width;
1800		} else if (size < 4 && size > 0) {
1801			size = 4;
1802		} else if (size == 0) {
1803			return 0;
1804		}
1805
1806		buf = radeon_freelist_get(dev);
1807		if (0 && !buf) {
1808			radeon_do_cp_idle(dev_priv);
1809			buf = radeon_freelist_get(dev);
1810		}
1811		if (!buf) {
1812			DRM_DEBUG("EAGAIN\n");
1813			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1814				return -EFAULT;
1815			return -EAGAIN;
1816		}
1817
1818		/* Dispatch the indirect buffer.
1819		 */
1820		buffer =
1821		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1822		dwords = size / 4;
1823
1824#define RADEON_COPY_MT(_buf, _data, _width) \
1825	do { \
1826		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1827			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1828			return -EFAULT; \
1829		} \
1830	} while(0)
1831
1832		if (microtile) {
1833			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1834			   however, we cannot use blitter directly for texture width < 64 bytes,
1835			   since minimum tex pitch is 64 bytes and we need this to match
1836			   the texture width, otherwise the blitter will tile it wrong.
1837			   Thus, tiling manually in this case. Additionally, need to special
1838			   case tex height = 1, since our actual image will have height 2
1839			   and we need to ensure we don't read beyond the texture size
1840			   from user space. */
1841			if (tex->height == 1) {
1842				if (tex_width >= 64 || tex_width <= 16) {
1843					RADEON_COPY_MT(buffer, data,
1844						(int)(tex_width * sizeof(u32)));
1845				} else if (tex_width == 32) {
1846					RADEON_COPY_MT(buffer, data, 16);
1847					RADEON_COPY_MT(buffer + 8,
1848						       data + 16, 16);
1849				}
1850			} else if (tex_width >= 64 || tex_width == 16) {
1851				RADEON_COPY_MT(buffer, data,
1852					       (int)(dwords * sizeof(u32)));
1853			} else if (tex_width < 16) {
1854				for (i = 0; i < tex->height; i++) {
1855					RADEON_COPY_MT(buffer, data, tex_width);
1856					buffer += 4;
1857					data += tex_width;
1858				}
1859			} else if (tex_width == 32) {
1860				/* TODO: make sure this works when not fitting in one buffer
1861				   (i.e. 32bytes x 2048...) */
1862				for (i = 0; i < tex->height; i += 2) {
1863					RADEON_COPY_MT(buffer, data, 16);
1864					data += 16;
1865					RADEON_COPY_MT(buffer + 8, data, 16);
1866					data += 16;
1867					RADEON_COPY_MT(buffer + 4, data, 16);
1868					data += 16;
1869					RADEON_COPY_MT(buffer + 12, data, 16);
1870					data += 16;
1871					buffer += 16;
1872				}
1873			}
1874		} else {
1875			if (tex_width >= 32) {
1876				/* Texture image width is larger than the minimum, so we
1877				 * can upload it directly.
1878				 */
1879				RADEON_COPY_MT(buffer, data,
1880					       (int)(dwords * sizeof(u32)));
1881			} else {
1882				/* Texture image width is less than the minimum, so we
1883				 * need to pad out each image scanline to the minimum
1884				 * width.
1885				 */
1886				for (i = 0; i < tex->height; i++) {
1887					RADEON_COPY_MT(buffer, data, tex_width);
1888					buffer += 8;
1889					data += tex_width;
1890				}
1891			}
1892		}
1893
1894#undef RADEON_COPY_MT
1895		byte_offset = (image->y & ~2047) * blit_width;
1896		buf->file_priv = file_priv;
1897		buf->used = size;
1898		offset = dev_priv->gart_buffers_offset + buf->offset;
1899		BEGIN_RING(9);
1900		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1901		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1902			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1903			 RADEON_GMC_BRUSH_NONE |
1904			 (format << 8) |
1905			 RADEON_GMC_SRC_DATATYPE_COLOR |
1906			 RADEON_ROP3_S |
1907			 RADEON_DP_SRC_SOURCE_MEMORY |
1908			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1909		OUT_RING((spitch << 22) | (offset >> 10));
1910		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1911		OUT_RING(0);
1912		OUT_RING((image->x << 16) | (image->y % 2048));
1913		OUT_RING((image->width << 16) | height);
1914		RADEON_WAIT_UNTIL_2D_IDLE();
1915		ADVANCE_RING();
1916		COMMIT_RING();
1917
1918		radeon_cp_discard_buffer(dev, file_priv->master, buf);
1919
1920		/* Update the input parameters for next time */
1921		image->y += height;
1922		image->height -= height;
1923		image->data = (const u8 __user *)image->data + size;
1924	} while (image->height > 0);
1925
1926	/* Flush the pixel cache after the blit completes.  This ensures
1927	 * the texture data is written out to memory before rendering
1928	 * continues.
1929	 */
1930	BEGIN_RING(4);
1931	RADEON_FLUSH_CACHE();
1932	RADEON_WAIT_UNTIL_2D_IDLE();
1933	ADVANCE_RING();
1934	COMMIT_RING();
1935
1936	return 0;
1937}
1938
1939static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1940{
1941	drm_radeon_private_t *dev_priv = dev->dev_private;
1942	int i;
1943	RING_LOCALS;
1944	DRM_DEBUG("\n");
1945
1946	BEGIN_RING(35);
1947
1948	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1949	OUT_RING(0x00000000);
1950
1951	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1952	for (i = 0; i < 32; i++) {
1953		OUT_RING(stipple[i]);
1954	}
1955
1956	ADVANCE_RING();
1957}
1958
1959static void radeon_apply_surface_regs(int surf_index,
1960				      drm_radeon_private_t *dev_priv)
1961{
1962	if (!dev_priv->mmio)
1963		return;
1964
1965	radeon_do_cp_idle(dev_priv);
1966
1967	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1968		     dev_priv->surfaces[surf_index].flags);
1969	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1970		     dev_priv->surfaces[surf_index].lower);
1971	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1972		     dev_priv->surfaces[surf_index].upper);
1973}
1974
1975/* Allocates a virtual surface
1976 * doesn't always allocate a real surface, will stretch an existing
1977 * surface when possible.
1978 *
1979 * Note that refcount can be at most 2, since during a free refcount=3
1980 * might mean we have to allocate a new surface which might not always
1981 * be available.
1982 * For example : we allocate three contiguous surfaces ABC. If B is
1983 * freed, we suddenly need two surfaces to store A and C, which might
1984 * not always be available.
1985 */
1986static int alloc_surface(drm_radeon_surface_alloc_t *new,
1987			 drm_radeon_private_t *dev_priv,
1988			 struct drm_file *file_priv)
1989{
1990	struct radeon_virt_surface *s;
1991	int i;
1992	int virt_surface_index;
1993	uint32_t new_upper, new_lower;
1994
1995	new_lower = new->address;
1996	new_upper = new_lower + new->size - 1;
1997
1998	/* sanity check */
1999	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
2000	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
2001	     RADEON_SURF_ADDRESS_FIXED_MASK)
2002	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2003		return -1;
2004
2005	/* make sure there is no overlap with existing surfaces */
2006	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2007		if ((dev_priv->surfaces[i].refcount != 0) &&
2008		    (((new_lower >= dev_priv->surfaces[i].lower) &&
2009		      (new_lower < dev_priv->surfaces[i].upper)) ||
2010		     ((new_lower < dev_priv->surfaces[i].lower) &&
2011		      (new_upper > dev_priv->surfaces[i].lower)))) {
2012			return -1;
2013		}
2014	}
2015
2016	/* find a virtual surface */
2017	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2018		if (dev_priv->virt_surfaces[i].file_priv == NULL)
2019			break;
2020	if (i == 2 * RADEON_MAX_SURFACES) {
2021		return -1;
2022	}
2023	virt_surface_index = i;
2024
2025	/* try to reuse an existing surface */
2026	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2027		/* extend before */
2028		if ((dev_priv->surfaces[i].refcount == 1) &&
2029		    (new->flags == dev_priv->surfaces[i].flags) &&
2030		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2031			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2032			s->surface_index = i;
2033			s->lower = new_lower;
2034			s->upper = new_upper;
2035			s->flags = new->flags;
2036			s->file_priv = file_priv;
2037			dev_priv->surfaces[i].refcount++;
2038			dev_priv->surfaces[i].lower = s->lower;
2039			radeon_apply_surface_regs(s->surface_index, dev_priv);
2040			return virt_surface_index;
2041		}
2042
2043		/* extend after */
2044		if ((dev_priv->surfaces[i].refcount == 1) &&
2045		    (new->flags == dev_priv->surfaces[i].flags) &&
2046		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2047			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2048			s->surface_index = i;
2049			s->lower = new_lower;
2050			s->upper = new_upper;
2051			s->flags = new->flags;
2052			s->file_priv = file_priv;
2053			dev_priv->surfaces[i].refcount++;
2054			dev_priv->surfaces[i].upper = s->upper;
2055			radeon_apply_surface_regs(s->surface_index, dev_priv);
2056			return virt_surface_index;
2057		}
2058	}
2059
2060	/* okay, we need a new one */
2061	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2062		if (dev_priv->surfaces[i].refcount == 0) {
2063			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2064			s->surface_index = i;
2065			s->lower = new_lower;
2066			s->upper = new_upper;
2067			s->flags = new->flags;
2068			s->file_priv = file_priv;
2069			dev_priv->surfaces[i].refcount = 1;
2070			dev_priv->surfaces[i].lower = s->lower;
2071			dev_priv->surfaces[i].upper = s->upper;
2072			dev_priv->surfaces[i].flags = s->flags;
2073			radeon_apply_surface_regs(s->surface_index, dev_priv);
2074			return virt_surface_index;
2075		}
2076	}
2077
2078	/* we didn't find anything */
2079	return -1;
2080}
2081
2082static int free_surface(struct drm_file *file_priv,
2083			drm_radeon_private_t * dev_priv,
2084			int lower)
2085{
2086	struct radeon_virt_surface *s;
2087	int i;
2088	/* find the virtual surface */
2089	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2090		s = &(dev_priv->virt_surfaces[i]);
2091		if (s->file_priv) {
2092			if ((lower == s->lower) && (file_priv == s->file_priv))
2093			{
2094				if (dev_priv->surfaces[s->surface_index].
2095				    lower == s->lower)
2096					dev_priv->surfaces[s->surface_index].
2097					    lower = s->upper;
2098
2099				if (dev_priv->surfaces[s->surface_index].
2100				    upper == s->upper)
2101					dev_priv->surfaces[s->surface_index].
2102					    upper = s->lower;
2103
2104				dev_priv->surfaces[s->surface_index].refcount--;
2105				if (dev_priv->surfaces[s->surface_index].
2106				    refcount == 0)
2107					dev_priv->surfaces[s->surface_index].
2108					    flags = 0;
2109				s->file_priv = NULL;
2110				radeon_apply_surface_regs(s->surface_index,
2111							  dev_priv);
2112				return 0;
2113			}
2114		}
2115	}
2116	return 1;
2117}
2118
2119static void radeon_surfaces_release(struct drm_file *file_priv,
2120				    drm_radeon_private_t * dev_priv)
2121{
2122	int i;
2123	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2124		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2125			free_surface(file_priv, dev_priv,
2126				     dev_priv->virt_surfaces[i].lower);
2127	}
2128}
2129
2130/* ================================================================
2131 * IOCTL functions
2132 */
2133static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2134{
2135	drm_radeon_private_t *dev_priv = dev->dev_private;
2136	drm_radeon_surface_alloc_t *alloc = data;
2137
2138	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2139		return -EINVAL;
2140	else
2141		return 0;
2142}
2143
2144static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2145{
2146	drm_radeon_private_t *dev_priv = dev->dev_private;
2147	drm_radeon_surface_free_t *memfree = data;
2148
2149	if (free_surface(file_priv, dev_priv, memfree->address))
2150		return -EINVAL;
2151	else
2152		return 0;
2153}
2154
2155static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2156{
2157	drm_radeon_private_t *dev_priv = dev->dev_private;
2158	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2159	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2160	drm_radeon_clear_t *clear = data;
2161	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2162	DRM_DEBUG("\n");
2163
2164	LOCK_TEST_WITH_RETURN(dev, file_priv);
2165
2166	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2167
2168	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2169		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2170
2171	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2172			       sarea_priv->nbox * sizeof(depth_boxes[0])))
2173		return -EFAULT;
2174
2175	radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2176
2177	COMMIT_RING();
2178	return 0;
2179}
2180
2181/* Not sure why this isn't set all the time:
2182 */
2183static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2184{
2185	drm_radeon_private_t *dev_priv = dev->dev_private;
2186	struct drm_radeon_master_private *master_priv = master->driver_priv;
2187	RING_LOCALS;
2188
2189	DRM_DEBUG("\n");
2190
2191	BEGIN_RING(6);
2192	RADEON_WAIT_UNTIL_3D_IDLE();
2193	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2194	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2195		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2196	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2197	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2198		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2199	ADVANCE_RING();
2200
2201	dev_priv->page_flipping = 1;
2202
2203	if (master_priv->sarea_priv->pfCurrentPage != 1)
2204		master_priv->sarea_priv->pfCurrentPage = 0;
2205
2206	return 0;
2207}
2208
2209/* Swapping and flipping are different operations, need different ioctls.
2210 * They can & should be intermixed to support multiple 3d windows.
2211 */
2212static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2213{
2214	drm_radeon_private_t *dev_priv = dev->dev_private;
2215	DRM_DEBUG("\n");
2216
2217	LOCK_TEST_WITH_RETURN(dev, file_priv);
2218
2219	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2220
2221	if (!dev_priv->page_flipping)
2222		radeon_do_init_pageflip(dev, file_priv->master);
2223
2224	radeon_cp_dispatch_flip(dev, file_priv->master);
2225
2226	COMMIT_RING();
2227	return 0;
2228}
2229
2230static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2231{
2232	drm_radeon_private_t *dev_priv = dev->dev_private;
2233	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2234	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2235
2236	DRM_DEBUG("\n");
2237
2238	LOCK_TEST_WITH_RETURN(dev, file_priv);
2239
2240	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2241
2242	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2243		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2244
2245	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2246		r600_cp_dispatch_swap(dev, file_priv);
2247	else
2248		radeon_cp_dispatch_swap(dev, file_priv->master);
2249	sarea_priv->ctx_owner = 0;
2250
2251	COMMIT_RING();
2252	return 0;
2253}
2254
2255static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2256{
2257	drm_radeon_private_t *dev_priv = dev->dev_private;
2258	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2259	drm_radeon_sarea_t *sarea_priv;
2260	struct drm_device_dma *dma = dev->dma;
2261	struct drm_buf *buf;
2262	drm_radeon_vertex_t *vertex = data;
2263	drm_radeon_tcl_prim_t prim;
2264
2265	LOCK_TEST_WITH_RETURN(dev, file_priv);
2266
2267	sarea_priv = master_priv->sarea_priv;
2268
2269	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2270		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2271
2272	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2273		DRM_ERROR("buffer index %d (of %d max)\n",
2274			  vertex->idx, dma->buf_count - 1);
2275		return -EINVAL;
2276	}
2277	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2278		DRM_ERROR("buffer prim %d\n", vertex->prim);
2279		return -EINVAL;
2280	}
2281
2282	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2283	VB_AGE_TEST_WITH_RETURN(dev_priv);
2284
2285	buf = dma->buflist[vertex->idx];
2286
2287	if (buf->file_priv != file_priv) {
2288		DRM_ERROR("process %d using buffer owned by %p\n",
2289			  DRM_CURRENTPID, buf->file_priv);
2290		return -EINVAL;
2291	}
2292	if (buf->pending) {
2293		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2294		return -EINVAL;
2295	}
2296
2297	/* Build up a prim_t record:
2298	 */
2299	if (vertex->count) {
2300		buf->used = vertex->count;	/* not used? */
2301
2302		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2303			if (radeon_emit_state(dev_priv, file_priv,
2304					      &sarea_priv->context_state,
2305					      sarea_priv->tex_state,
2306					      sarea_priv->dirty)) {
2307				DRM_ERROR("radeon_emit_state failed\n");
2308				return -EINVAL;
2309			}
2310
2311			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2312					       RADEON_UPLOAD_TEX1IMAGES |
2313					       RADEON_UPLOAD_TEX2IMAGES |
2314					       RADEON_REQUIRE_QUIESCENCE);
2315		}
2316
2317		prim.start = 0;
2318		prim.finish = vertex->count;	/* unused */
2319		prim.prim = vertex->prim;
2320		prim.numverts = vertex->count;
2321		prim.vc_format = sarea_priv->vc_format;
2322
2323		radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2324	}
2325
2326	if (vertex->discard) {
2327		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2328	}
2329
2330	COMMIT_RING();
2331	return 0;
2332}
2333
2334static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2335{
2336	drm_radeon_private_t *dev_priv = dev->dev_private;
2337	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2338	drm_radeon_sarea_t *sarea_priv;
2339	struct drm_device_dma *dma = dev->dma;
2340	struct drm_buf *buf;
2341	drm_radeon_indices_t *elts = data;
2342	drm_radeon_tcl_prim_t prim;
2343	int count;
2344
2345	LOCK_TEST_WITH_RETURN(dev, file_priv);
2346
2347	sarea_priv = master_priv->sarea_priv;
2348
2349	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2350		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2351		  elts->discard);
2352
2353	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2354		DRM_ERROR("buffer index %d (of %d max)\n",
2355			  elts->idx, dma->buf_count - 1);
2356		return -EINVAL;
2357	}
2358	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2359		DRM_ERROR("buffer prim %d\n", elts->prim);
2360		return -EINVAL;
2361	}
2362
2363	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2364	VB_AGE_TEST_WITH_RETURN(dev_priv);
2365
2366	buf = dma->buflist[elts->idx];
2367
2368	if (buf->file_priv != file_priv) {
2369		DRM_ERROR("process %d using buffer owned by %p\n",
2370			  DRM_CURRENTPID, buf->file_priv);
2371		return -EINVAL;
2372	}
2373	if (buf->pending) {
2374		DRM_ERROR("sending pending buffer %d\n", elts->idx);
2375		return -EINVAL;
2376	}
2377
2378	count = (elts->end - elts->start) / sizeof(u16);
2379	elts->start -= RADEON_INDEX_PRIM_OFFSET;
2380
2381	if (elts->start & 0x7) {
2382		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2383		return -EINVAL;
2384	}
2385	if (elts->start < buf->used) {
2386		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2387		return -EINVAL;
2388	}
2389
2390	buf->used = elts->end;
2391
2392	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2393		if (radeon_emit_state(dev_priv, file_priv,
2394				      &sarea_priv->context_state,
2395				      sarea_priv->tex_state,
2396				      sarea_priv->dirty)) {
2397			DRM_ERROR("radeon_emit_state failed\n");
2398			return -EINVAL;
2399		}
2400
2401		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2402				       RADEON_UPLOAD_TEX1IMAGES |
2403				       RADEON_UPLOAD_TEX2IMAGES |
2404				       RADEON_REQUIRE_QUIESCENCE);
2405	}
2406
2407	/* Build up a prim_t record:
2408	 */
2409	prim.start = elts->start;
2410	prim.finish = elts->end;
2411	prim.prim = elts->prim;
2412	prim.offset = 0;	/* offset from start of dma buffers */
2413	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2414	prim.vc_format = sarea_priv->vc_format;
2415
2416	radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2417	if (elts->discard) {
2418		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2419	}
2420
2421	COMMIT_RING();
2422	return 0;
2423}
2424
2425static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2426{
2427	drm_radeon_private_t *dev_priv = dev->dev_private;
2428	drm_radeon_texture_t *tex = data;
2429	drm_radeon_tex_image_t image;
2430	int ret;
2431
2432	LOCK_TEST_WITH_RETURN(dev, file_priv);
2433
2434	if (tex->image == NULL) {
2435		DRM_ERROR("null texture image!\n");
2436		return -EINVAL;
2437	}
2438
2439	if (DRM_COPY_FROM_USER(&image,
2440			       (drm_radeon_tex_image_t __user *) tex->image,
2441			       sizeof(image)))
2442		return -EFAULT;
2443
2444	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2445	VB_AGE_TEST_WITH_RETURN(dev_priv);
2446
2447	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2448		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2449	else
2450		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2451
2452	return ret;
2453}
2454
2455static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2456{
2457	drm_radeon_private_t *dev_priv = dev->dev_private;
2458	drm_radeon_stipple_t *stipple = data;
2459	u32 mask[32];
2460
2461	LOCK_TEST_WITH_RETURN(dev, file_priv);
2462
2463	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2464		return -EFAULT;
2465
2466	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2467
2468	radeon_cp_dispatch_stipple(dev, mask);
2469
2470	COMMIT_RING();
2471	return 0;
2472}
2473
2474static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2475{
2476	drm_radeon_private_t *dev_priv = dev->dev_private;
2477	struct drm_device_dma *dma = dev->dma;
2478	struct drm_buf *buf;
2479	drm_radeon_indirect_t *indirect = data;
2480	RING_LOCALS;
2481
2482	LOCK_TEST_WITH_RETURN(dev, file_priv);
2483
2484	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2485		  indirect->idx, indirect->start, indirect->end,
2486		  indirect->discard);
2487
2488	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2489		DRM_ERROR("buffer index %d (of %d max)\n",
2490			  indirect->idx, dma->buf_count - 1);
2491		return -EINVAL;
2492	}
2493
2494	buf = dma->buflist[indirect->idx];
2495
2496	if (buf->file_priv != file_priv) {
2497		DRM_ERROR("process %d using buffer owned by %p\n",
2498			  DRM_CURRENTPID, buf->file_priv);
2499		return -EINVAL;
2500	}
2501	if (buf->pending) {
2502		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2503		return -EINVAL;
2504	}
2505
2506	if (indirect->start < buf->used) {
2507		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2508			  indirect->start, buf->used);
2509		return -EINVAL;
2510	}
2511
2512	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2513	VB_AGE_TEST_WITH_RETURN(dev_priv);
2514
2515	buf->used = indirect->end;
2516
2517	/* Dispatch the indirect buffer full of commands from the
2518	 * X server.  This is insecure and is thus only available to
2519	 * privileged clients.
2520	 */
2521	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2522		r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2523	else {
2524		/* Wait for the 3D stream to idle before the indirect buffer
2525		 * containing 2D acceleration commands is processed.
2526		 */
2527		BEGIN_RING(2);
2528		RADEON_WAIT_UNTIL_3D_IDLE();
2529		ADVANCE_RING();
2530		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2531	}
2532
2533	if (indirect->discard) {
2534		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2535	}
2536
2537	COMMIT_RING();
2538	return 0;
2539}
2540
2541static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2542{
2543	drm_radeon_private_t *dev_priv = dev->dev_private;
2544	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2545	drm_radeon_sarea_t *sarea_priv;
2546	struct drm_device_dma *dma = dev->dma;
2547	struct drm_buf *buf;
2548	drm_radeon_vertex2_t *vertex = data;
2549	int i;
2550	unsigned char laststate;
2551
2552	LOCK_TEST_WITH_RETURN(dev, file_priv);
2553
2554	sarea_priv = master_priv->sarea_priv;
2555
2556	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2557		  DRM_CURRENTPID, vertex->idx, vertex->discard);
2558
2559	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2560		DRM_ERROR("buffer index %d (of %d max)\n",
2561			  vertex->idx, dma->buf_count - 1);
2562		return -EINVAL;
2563	}
2564
2565	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2566	VB_AGE_TEST_WITH_RETURN(dev_priv);
2567
2568	buf = dma->buflist[vertex->idx];
2569
2570	if (buf->file_priv != file_priv) {
2571		DRM_ERROR("process %d using buffer owned by %p\n",
2572			  DRM_CURRENTPID, buf->file_priv);
2573		return -EINVAL;
2574	}
2575
2576	if (buf->pending) {
2577		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2578		return -EINVAL;
2579	}
2580
2581	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2582		return -EINVAL;
2583
2584	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2585		drm_radeon_prim_t prim;
2586		drm_radeon_tcl_prim_t tclprim;
2587
2588		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2589			return -EFAULT;
2590
2591		if (prim.stateidx != laststate) {
2592			drm_radeon_state_t state;
2593
2594			if (DRM_COPY_FROM_USER(&state,
2595					       &vertex->state[prim.stateidx],
2596					       sizeof(state)))
2597				return -EFAULT;
2598
2599			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2600				DRM_ERROR("radeon_emit_state2 failed\n");
2601				return -EINVAL;
2602			}
2603
2604			laststate = prim.stateidx;
2605		}
2606
2607		tclprim.start = prim.start;
2608		tclprim.finish = prim.finish;
2609		tclprim.prim = prim.prim;
2610		tclprim.vc_format = prim.vc_format;
2611
2612		if (prim.prim & RADEON_PRIM_WALK_IND) {
2613			tclprim.offset = prim.numverts * 64;
2614			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2615
2616			radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2617		} else {
2618			tclprim.numverts = prim.numverts;
2619			tclprim.offset = 0;	/* not used */
2620
2621			radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2622		}
2623
2624		if (sarea_priv->nbox == 1)
2625			sarea_priv->nbox = 0;
2626	}
2627
2628	if (vertex->discard) {
2629		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2630	}
2631
2632	COMMIT_RING();
2633	return 0;
2634}
2635
2636static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2637			       struct drm_file *file_priv,
2638			       drm_radeon_cmd_header_t header,
2639			       drm_radeon_kcmd_buffer_t *cmdbuf)
2640{
2641	int id = (int)header.packet.packet_id;
2642	int sz, reg;
2643	RING_LOCALS;
2644
2645	if (id >= RADEON_MAX_STATE_PACKETS)
2646		return -EINVAL;
2647
2648	sz = packet[id].len;
2649	reg = packet[id].start;
2650
2651	if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
2652		DRM_ERROR("Packet size provided larger than data provided\n");
2653		return -EINVAL;
2654	}
2655
2656	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
2657				cmdbuf->buffer)) {
2658		DRM_ERROR("Packet verification failed\n");
2659		return -EINVAL;
2660	}
2661
2662	BEGIN_RING(sz + 1);
2663	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2664	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2665	ADVANCE_RING();
2666
2667	return 0;
2668}
2669
2670static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2671					  drm_radeon_cmd_header_t header,
2672					  drm_radeon_kcmd_buffer_t *cmdbuf)
2673{
2674	int sz = header.scalars.count;
2675	int start = header.scalars.offset;
2676	int stride = header.scalars.stride;
2677	RING_LOCALS;
2678
2679	BEGIN_RING(3 + sz);
2680	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2681	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2682	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2683	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2684	ADVANCE_RING();
2685	return 0;
2686}
2687
2688/* God this is ugly
2689 */
2690static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2691					   drm_radeon_cmd_header_t header,
2692					   drm_radeon_kcmd_buffer_t *cmdbuf)
2693{
2694	int sz = header.scalars.count;
2695	int start = ((unsigned int)header.scalars.offset) + 0x100;
2696	int stride = header.scalars.stride;
2697	RING_LOCALS;
2698
2699	BEGIN_RING(3 + sz);
2700	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2701	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2702	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2703	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2704	ADVANCE_RING();
2705	return 0;
2706}
2707
2708static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2709					  drm_radeon_cmd_header_t header,
2710					  drm_radeon_kcmd_buffer_t *cmdbuf)
2711{
2712	int sz = header.vectors.count;
2713	int start = header.vectors.offset;
2714	int stride = header.vectors.stride;
2715	RING_LOCALS;
2716
2717	BEGIN_RING(5 + sz);
2718	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2719	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2720	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2721	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2722	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2723	ADVANCE_RING();
2724
2725	return 0;
2726}
2727
2728static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2729					  drm_radeon_cmd_header_t header,
2730					  drm_radeon_kcmd_buffer_t *cmdbuf)
2731{
2732	int sz = header.veclinear.count * 4;
2733	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2734	RING_LOCALS;
2735
2736        if (!sz)
2737                return 0;
2738	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
2739                return -EINVAL;
2740
2741	BEGIN_RING(5 + sz);
2742	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2743	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2744	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2745	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2746	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2747	ADVANCE_RING();
2748
2749	return 0;
2750}
2751
2752static int radeon_emit_packet3(struct drm_device * dev,
2753			       struct drm_file *file_priv,
2754			       drm_radeon_kcmd_buffer_t *cmdbuf)
2755{
2756	drm_radeon_private_t *dev_priv = dev->dev_private;
2757	unsigned int cmdsz;
2758	int ret;
2759	RING_LOCALS;
2760
2761	DRM_DEBUG("\n");
2762
2763	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2764						  cmdbuf, &cmdsz))) {
2765		DRM_ERROR("Packet verification failed\n");
2766		return ret;
2767	}
2768
2769	BEGIN_RING(cmdsz);
2770	OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2771	ADVANCE_RING();
2772
2773	return 0;
2774}
2775
2776static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2777					struct drm_file *file_priv,
2778					drm_radeon_kcmd_buffer_t *cmdbuf,
2779					int orig_nbox)
2780{
2781	drm_radeon_private_t *dev_priv = dev->dev_private;
2782	struct drm_clip_rect box;
2783	unsigned int cmdsz;
2784	int ret;
2785	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2786	int i = 0;
2787	RING_LOCALS;
2788
2789	DRM_DEBUG("\n");
2790
2791	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2792						  cmdbuf, &cmdsz))) {
2793		DRM_ERROR("Packet verification failed\n");
2794		return ret;
2795	}
2796
2797	if (!orig_nbox)
2798		goto out;
2799
2800	do {
2801		if (i < cmdbuf->nbox) {
2802			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2803				return -EFAULT;
2804			/* FIXME The second and subsequent times round
2805			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2806			 * calling emit_clip_rect(). This fixes a
2807			 * lockup on fast machines when sending
2808			 * several cliprects with a cmdbuf, as when
2809			 * waving a 2D window over a 3D
2810			 * window. Something in the commands from user
2811			 * space seems to hang the card when they're
2812			 * sent several times in a row. That would be
2813			 * the correct place to fix it but this works
2814			 * around it until I can figure that out - Tim
2815			 * Smith */
2816			if (i) {
2817				BEGIN_RING(2);
2818				RADEON_WAIT_UNTIL_3D_IDLE();
2819				ADVANCE_RING();
2820			}
2821			radeon_emit_clip_rect(dev_priv, &box);
2822		}
2823
2824		BEGIN_RING(cmdsz);
2825		OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2826		ADVANCE_RING();
2827
2828	} while (++i < cmdbuf->nbox);
2829	if (cmdbuf->nbox == 1)
2830		cmdbuf->nbox = 0;
2831
2832	return 0;
2833      out:
2834	drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
2835	return 0;
2836}
2837
2838static int radeon_emit_wait(struct drm_device * dev, int flags)
2839{
2840	drm_radeon_private_t *dev_priv = dev->dev_private;
2841	RING_LOCALS;
2842
2843	DRM_DEBUG("%x\n", flags);
2844	switch (flags) {
2845	case RADEON_WAIT_2D:
2846		BEGIN_RING(2);
2847		RADEON_WAIT_UNTIL_2D_IDLE();
2848		ADVANCE_RING();
2849		break;
2850	case RADEON_WAIT_3D:
2851		BEGIN_RING(2);
2852		RADEON_WAIT_UNTIL_3D_IDLE();
2853		ADVANCE_RING();
2854		break;
2855	case RADEON_WAIT_2D | RADEON_WAIT_3D:
2856		BEGIN_RING(2);
2857		RADEON_WAIT_UNTIL_IDLE();
2858		ADVANCE_RING();
2859		break;
2860	default:
2861		return -EINVAL;
2862	}
2863
2864	return 0;
2865}
2866
2867static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
2868		struct drm_file *file_priv)
2869{
2870	drm_radeon_private_t *dev_priv = dev->dev_private;
2871	struct drm_device_dma *dma = dev->dma;
2872	struct drm_buf *buf = NULL;
2873	drm_radeon_cmd_header_t stack_header;
2874	int idx;
2875	drm_radeon_kcmd_buffer_t *cmdbuf = data;
2876	int orig_nbox;
2877
2878	LOCK_TEST_WITH_RETURN(dev, file_priv);
2879
2880	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2881	VB_AGE_TEST_WITH_RETURN(dev_priv);
2882
2883	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2884		return -EINVAL;
2885	}
2886
2887	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2888	 * races between checking values and using those values in other code,
2889	 * and simply to avoid a lot of function calls to copy in data.
2890	 */
2891	if (cmdbuf->bufsz != 0) {
2892		int rv;
2893		void __user *buffer = cmdbuf->buffer;
2894		rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
2895		if (rv)
2896			return rv;
2897		rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
2898						cmdbuf->bufsz);
2899		if (rv) {
2900			drm_buffer_free(cmdbuf->buffer);
2901			return rv;
2902		}
2903	} else
2904		goto done;
2905
2906	orig_nbox = cmdbuf->nbox;
2907
2908	if (dev_priv->microcode_version == UCODE_R300) {
2909		int temp;
2910		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2911
2912		drm_buffer_free(cmdbuf->buffer);
2913
2914		return temp;
2915	}
2916
2917	/* microcode_version != r300 */
2918	while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
2919
2920		drm_radeon_cmd_header_t *header;
2921		header = drm_buffer_read_object(cmdbuf->buffer,
2922				sizeof(stack_header), &stack_header);
2923
2924		switch (header->header.cmd_type) {
2925		case RADEON_CMD_PACKET:
2926			DRM_DEBUG("RADEON_CMD_PACKET\n");
2927			if (radeon_emit_packets
2928			    (dev_priv, file_priv, *header, cmdbuf)) {
2929				DRM_ERROR("radeon_emit_packets failed\n");
2930				goto err;
2931			}
2932			break;
2933
2934		case RADEON_CMD_SCALARS:
2935			DRM_DEBUG("RADEON_CMD_SCALARS\n");
2936			if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
2937				DRM_ERROR("radeon_emit_scalars failed\n");
2938				goto err;
2939			}
2940			break;
2941
2942		case RADEON_CMD_VECTORS:
2943			DRM_DEBUG("RADEON_CMD_VECTORS\n");
2944			if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
2945				DRM_ERROR("radeon_emit_vectors failed\n");
2946				goto err;
2947			}
2948			break;
2949
2950		case RADEON_CMD_DMA_DISCARD:
2951			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2952			idx = header->dma.buf_idx;
2953			if (idx < 0 || idx >= dma->buf_count) {
2954				DRM_ERROR("buffer index %d (of %d max)\n",
2955					  idx, dma->buf_count - 1);
2956				goto err;
2957			}
2958
2959			buf = dma->buflist[idx];
2960			if (buf->file_priv != file_priv || buf->pending) {
2961				DRM_ERROR("bad buffer %p %p %d\n",
2962					  buf->file_priv, file_priv,
2963					  buf->pending);
2964				goto err;
2965			}
2966
2967			radeon_cp_discard_buffer(dev, file_priv->master, buf);
2968			break;
2969
2970		case RADEON_CMD_PACKET3:
2971			DRM_DEBUG("RADEON_CMD_PACKET3\n");
2972			if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2973				DRM_ERROR("radeon_emit_packet3 failed\n");
2974				goto err;
2975			}
2976			break;
2977
2978		case RADEON_CMD_PACKET3_CLIP:
2979			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2980			if (radeon_emit_packet3_cliprect
2981			    (dev, file_priv, cmdbuf, orig_nbox)) {
2982				DRM_ERROR("radeon_emit_packet3_clip failed\n");
2983				goto err;
2984			}
2985			break;
2986
2987		case RADEON_CMD_SCALARS2:
2988			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2989			if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
2990				DRM_ERROR("radeon_emit_scalars2 failed\n");
2991				goto err;
2992			}
2993			break;
2994
2995		case RADEON_CMD_WAIT:
2996			DRM_DEBUG("RADEON_CMD_WAIT\n");
2997			if (radeon_emit_wait(dev, header->wait.flags)) {
2998				DRM_ERROR("radeon_emit_wait failed\n");
2999				goto err;
3000			}
3001			break;
3002		case RADEON_CMD_VECLINEAR:
3003			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3004			if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
3005				DRM_ERROR("radeon_emit_veclinear failed\n");
3006				goto err;
3007			}
3008			break;
3009
3010		default:
3011			DRM_ERROR("bad cmd_type %d at byte %d\n",
3012				  header->header.cmd_type,
3013				  cmdbuf->buffer->iterator);
3014			goto err;
3015		}
3016	}
3017
3018	drm_buffer_free(cmdbuf->buffer);
3019
3020      done:
3021	DRM_DEBUG("DONE\n");
3022	COMMIT_RING();
3023	return 0;
3024
3025      err:
3026	drm_buffer_free(cmdbuf->buffer);
3027	return -EINVAL;
3028}
3029
3030static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3031{
3032	drm_radeon_private_t *dev_priv = dev->dev_private;
3033	drm_radeon_getparam_t *param = data;
3034	int value;
3035
3036	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3037
3038	switch (param->param) {
3039	case RADEON_PARAM_GART_BUFFER_OFFSET:
3040		value = dev_priv->gart_buffers_offset;
3041		break;
3042	case RADEON_PARAM_LAST_FRAME:
3043		dev_priv->stats.last_frame_reads++;
3044		value = GET_SCRATCH(dev_priv, 0);
3045		break;
3046	case RADEON_PARAM_LAST_DISPATCH:
3047		value = GET_SCRATCH(dev_priv, 1);
3048		break;
3049	case RADEON_PARAM_LAST_CLEAR:
3050		dev_priv->stats.last_clear_reads++;
3051		value = GET_SCRATCH(dev_priv, 2);
3052		break;
3053	case RADEON_PARAM_IRQ_NR:
3054		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3055			value = 0;
3056		else
3057			value = drm_dev_to_irq(dev);
3058		break;
3059	case RADEON_PARAM_GART_BASE:
3060		value = dev_priv->gart_vm_start;
3061		break;
3062	case RADEON_PARAM_REGISTER_HANDLE:
3063		value = dev_priv->mmio->offset;
3064		break;
3065	case RADEON_PARAM_STATUS_HANDLE:
3066		value = dev_priv->ring_rptr_offset;
3067		break;
3068#if BITS_PER_LONG == 32
3069		/*
3070		 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3071		 * pointer which can't fit into an int-sized variable.  According to
3072		 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3073		 * not supporting it shouldn't be a problem.  If the same functionality
3074		 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3075		 * so backwards-compatibility for the embedded platforms can be
3076		 * maintained.  --davidm 4-Feb-2004.
3077		 */
3078	case RADEON_PARAM_SAREA_HANDLE:
3079		/* The lock is the first dword in the sarea. */
3080		/* no users of this parameter */
3081		break;
3082#endif
3083	case RADEON_PARAM_GART_TEX_HANDLE:
3084		value = dev_priv->gart_textures_offset;
3085		break;
3086	case RADEON_PARAM_SCRATCH_OFFSET:
3087		if (!dev_priv->writeback_works)
3088			return -EINVAL;
3089		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3090			value = R600_SCRATCH_REG_OFFSET;
3091		else
3092			value = RADEON_SCRATCH_REG_OFFSET;
3093		break;
3094	case RADEON_PARAM_CARD_TYPE:
3095		if (dev_priv->flags & RADEON_IS_PCIE)
3096			value = RADEON_CARD_PCIE;
3097		else if (dev_priv->flags & RADEON_IS_AGP)
3098			value = RADEON_CARD_AGP;
3099		else
3100			value = RADEON_CARD_PCI;
3101		break;
3102	case RADEON_PARAM_VBLANK_CRTC:
3103		value = radeon_vblank_crtc_get(dev);
3104		break;
3105	case RADEON_PARAM_FB_LOCATION:
3106		value = radeon_read_fb_location(dev_priv);
3107		break;
3108	case RADEON_PARAM_NUM_GB_PIPES:
3109		value = dev_priv->num_gb_pipes;
3110		break;
3111	case RADEON_PARAM_NUM_Z_PIPES:
3112		value = dev_priv->num_z_pipes;
3113		break;
3114	default:
3115		DRM_DEBUG("Invalid parameter %d\n", param->param);
3116		return -EINVAL;
3117	}
3118
3119	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3120		DRM_ERROR("copy_to_user\n");
3121		return -EFAULT;
3122	}
3123
3124	return 0;
3125}
3126
3127static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3128{
3129	drm_radeon_private_t *dev_priv = dev->dev_private;
3130	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3131	drm_radeon_setparam_t *sp = data;
3132	struct drm_radeon_driver_file_fields *radeon_priv;
3133
3134	switch (sp->param) {
3135	case RADEON_SETPARAM_FB_LOCATION:
3136		radeon_priv = file_priv->driver_priv;
3137		radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3138		    sp->value;
3139		break;
3140	case RADEON_SETPARAM_SWITCH_TILING:
3141		if (sp->value == 0) {
3142			DRM_DEBUG("color tiling disabled\n");
3143			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3144			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3145			if (master_priv->sarea_priv)
3146				master_priv->sarea_priv->tiling_enabled = 0;
3147		} else if (sp->value == 1) {
3148			DRM_DEBUG("color tiling enabled\n");
3149			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3150			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3151			if (master_priv->sarea_priv)
3152				master_priv->sarea_priv->tiling_enabled = 1;
3153		}
3154		break;
3155	case RADEON_SETPARAM_PCIGART_LOCATION:
3156		dev_priv->pcigart_offset = sp->value;
3157		dev_priv->pcigart_offset_set = 1;
3158		break;
3159	case RADEON_SETPARAM_NEW_MEMMAP:
3160		dev_priv->new_memmap = sp->value;
3161		break;
3162	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3163		dev_priv->gart_info.table_size = sp->value;
3164		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3165			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3166		break;
3167	case RADEON_SETPARAM_VBLANK_CRTC:
3168		return radeon_vblank_crtc_set(dev, sp->value);
3169		break;
3170	default:
3171		DRM_DEBUG("Invalid parameter %d\n", sp->param);
3172		return -EINVAL;
3173	}
3174
3175	return 0;
3176}
3177
3178/* When a client dies:
3179 *    - Check for and clean up flipped page state
3180 *    - Free any alloced GART memory.
3181 *    - Free any alloced radeon surfaces.
3182 *
3183 * DRM infrastructure takes care of reclaiming dma buffers.
3184 */
3185void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3186{
3187	if (dev->dev_private) {
3188		drm_radeon_private_t *dev_priv = dev->dev_private;
3189		dev_priv->page_flipping = 0;
3190		radeon_mem_release(file_priv, dev_priv->gart_heap);
3191		radeon_mem_release(file_priv, dev_priv->fb_heap);
3192		radeon_surfaces_release(file_priv, dev_priv);
3193	}
3194}
3195
3196void radeon_driver_lastclose(struct drm_device *dev)
3197{
3198	radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3199	radeon_do_release(dev);
3200}
3201
3202int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3203{
3204	drm_radeon_private_t *dev_priv = dev->dev_private;
3205	struct drm_radeon_driver_file_fields *radeon_priv;
3206
3207	DRM_DEBUG("\n");
3208	radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
3209
3210	if (!radeon_priv)
3211		return -ENOMEM;
3212
3213	file_priv->driver_priv = radeon_priv;
3214
3215	if (dev_priv)
3216		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3217	else
3218		radeon_priv->radeon_fb_delta = 0;
3219	return 0;
3220}
3221
3222void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3223{
3224	struct drm_radeon_driver_file_fields *radeon_priv =
3225	    file_priv->driver_priv;
3226
3227	kfree(radeon_priv);
3228}
3229
3230struct drm_ioctl_desc radeon_ioctls[] = {
3231	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3232	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3233	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3234	DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235	DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3236	DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3237	DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3238	DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3239	DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3240	DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3241	DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3242	DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3243	DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3244	DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3245	DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3246	DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3247	DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3248	DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3249	DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3250	DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3251	DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
3252	DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3253	DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3254	DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3255	DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3256	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3257	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3258	DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3259};
3260
3261int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3262