r300.c revision 531369e62649bb8f31217cc0bf33ee6f89f1dff6
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 *          Alex Deucher
26 *          Jerome Glisse
27 */
28#include <linux/seq_file.h>
29#include "drmP.h"
30#include "drm.h"
31#include "radeon_reg.h"
32#include "radeon.h"
33
34/* r300,r350,rv350,rv370,rv380 depends on : */
35void r100_hdp_reset(struct radeon_device *rdev);
36int r100_cp_reset(struct radeon_device *rdev);
37int r100_rb2d_reset(struct radeon_device *rdev);
38int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
39int r100_pci_gart_enable(struct radeon_device *rdev);
40void r100_pci_gart_disable(struct radeon_device *rdev);
41void r100_mc_setup(struct radeon_device *rdev);
42void r100_mc_disable_clients(struct radeon_device *rdev);
43int r100_gui_wait_for_idle(struct radeon_device *rdev);
44int r100_cs_packet_parse(struct radeon_cs_parser *p,
45			 struct radeon_cs_packet *pkt,
46			 unsigned idx);
47int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
48int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
49			      struct radeon_cs_reloc **cs_reloc);
50int r100_cs_parse_packet0(struct radeon_cs_parser *p,
51			  struct radeon_cs_packet *pkt,
52			  const unsigned *auth, unsigned n,
53			  radeon_packet0_check_t check);
54void r100_cs_dump_packet(struct radeon_cs_parser *p,
55			 struct radeon_cs_packet *pkt);
56int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
57					 struct radeon_cs_packet *pkt,
58					 struct radeon_object *robj);
59
60/* This files gather functions specifics to:
61 * r300,r350,rv350,rv370,rv380
62 *
63 * Some of these functions might be used by newer ASICs.
64 */
65void r300_gpu_init(struct radeon_device *rdev);
66int r300_mc_wait_for_idle(struct radeon_device *rdev);
67int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
68
69
70/*
71 * rv370,rv380 PCIE GART
72 */
73void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
74{
75	uint32_t tmp;
76	int i;
77
78	/* Workaround HW bug do flush 2 times */
79	for (i = 0; i < 2; i++) {
80		tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
81		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
82		(void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
84		mb();
85	}
86}
87
88int rv370_pcie_gart_enable(struct radeon_device *rdev)
89{
90	uint32_t table_addr;
91	uint32_t tmp;
92	int r;
93
94	/* Initialize common gart structure */
95	r = radeon_gart_init(rdev);
96	if (r) {
97		return r;
98	}
99	r = rv370_debugfs_pcie_gart_info_init(rdev);
100	if (r) {
101		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
102	}
103	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
104	r = radeon_gart_table_vram_alloc(rdev);
105	if (r) {
106		return r;
107	}
108	/* discard memory request outside of configured range */
109	tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
110	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
111	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
112	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
113	WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
114	WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
115	WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
116	table_addr = rdev->gart.table_addr;
117	WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
118	/* FIXME: setup default page */
119	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
120	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
121	/* Clear error */
122	WREG32_PCIE(0x18, 0);
123	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
124	tmp |= RADEON_PCIE_TX_GART_EN;
125	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
126	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
127	rv370_pcie_gart_tlb_flush(rdev);
128	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
129		 rdev->mc.gtt_size >> 20, table_addr);
130	rdev->gart.ready = true;
131	return 0;
132}
133
134void rv370_pcie_gart_disable(struct radeon_device *rdev)
135{
136	uint32_t tmp;
137
138	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
139	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
140	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
141	if (rdev->gart.table.vram.robj) {
142		radeon_object_kunmap(rdev->gart.table.vram.robj);
143		radeon_object_unpin(rdev->gart.table.vram.robj);
144	}
145}
146
147int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
148{
149	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
150
151	if (i < 0 || i > rdev->gart.num_gpu_pages) {
152		return -EINVAL;
153	}
154	addr = (lower_32_bits(addr) >> 8) |
155	       ((upper_32_bits(addr) & 0xff) << 24) |
156	       0xc;
157	writel(cpu_to_le32(addr), ((void __iomem *)ptr) + (i * 4));
158	return 0;
159}
160
161int r300_gart_enable(struct radeon_device *rdev)
162{
163#if __OS_HAS_AGP
164	if (rdev->flags & RADEON_IS_AGP) {
165		if (rdev->family > CHIP_RV350) {
166			rv370_pcie_gart_disable(rdev);
167		} else {
168			r100_pci_gart_disable(rdev);
169		}
170		return 0;
171	}
172#endif
173	if (rdev->flags & RADEON_IS_PCIE) {
174		rdev->asic->gart_disable = &rv370_pcie_gart_disable;
175		rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
176		rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
177		return rv370_pcie_gart_enable(rdev);
178	}
179	return r100_pci_gart_enable(rdev);
180}
181
182
183/*
184 * MC
185 */
186int r300_mc_init(struct radeon_device *rdev)
187{
188	int r;
189
190	if (r100_debugfs_rbbm_init(rdev)) {
191		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
192	}
193
194	r300_gpu_init(rdev);
195	r100_pci_gart_disable(rdev);
196	if (rdev->flags & RADEON_IS_PCIE) {
197		rv370_pcie_gart_disable(rdev);
198	}
199
200	/* Setup GPU memory space */
201	rdev->mc.vram_location = 0xFFFFFFFFUL;
202	rdev->mc.gtt_location = 0xFFFFFFFFUL;
203	if (rdev->flags & RADEON_IS_AGP) {
204		r = radeon_agp_init(rdev);
205		if (r) {
206			printk(KERN_WARNING "[drm] Disabling AGP\n");
207			rdev->flags &= ~RADEON_IS_AGP;
208			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
209		} else {
210			rdev->mc.gtt_location = rdev->mc.agp_base;
211		}
212	}
213	r = radeon_mc_setup(rdev);
214	if (r) {
215		return r;
216	}
217
218	/* Program GPU memory space */
219	r100_mc_disable_clients(rdev);
220	if (r300_mc_wait_for_idle(rdev)) {
221		printk(KERN_WARNING "Failed to wait MC idle while "
222		       "programming pipes. Bad things might happen.\n");
223	}
224	r100_mc_setup(rdev);
225	return 0;
226}
227
228void r300_mc_fini(struct radeon_device *rdev)
229{
230	if (rdev->flags & RADEON_IS_PCIE) {
231		rv370_pcie_gart_disable(rdev);
232		radeon_gart_table_vram_free(rdev);
233	} else {
234		r100_pci_gart_disable(rdev);
235		radeon_gart_table_ram_free(rdev);
236	}
237	radeon_gart_fini(rdev);
238}
239
240
241/*
242 * Fence emission
243 */
244void r300_fence_ring_emit(struct radeon_device *rdev,
245			  struct radeon_fence *fence)
246{
247	/* Who ever call radeon_fence_emit should call ring_lock and ask
248	 * for enough space (today caller are ib schedule and buffer move) */
249	/* Write SC register so SC & US assert idle */
250	radeon_ring_write(rdev, PACKET0(0x43E0, 0));
251	radeon_ring_write(rdev, 0);
252	radeon_ring_write(rdev, PACKET0(0x43E4, 0));
253	radeon_ring_write(rdev, 0);
254	/* Flush 3D cache */
255	radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
256	radeon_ring_write(rdev, (2 << 0));
257	radeon_ring_write(rdev, PACKET0(0x4F18, 0));
258	radeon_ring_write(rdev, (1 << 0));
259	/* Wait until IDLE & CLEAN */
260	radeon_ring_write(rdev, PACKET0(0x1720, 0));
261	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
262	/* Emit fence sequence & fire IRQ */
263	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
264	radeon_ring_write(rdev, fence->seq);
265	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
266	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
267}
268
269
270/*
271 * Global GPU functions
272 */
273int r300_copy_dma(struct radeon_device *rdev,
274		  uint64_t src_offset,
275		  uint64_t dst_offset,
276		  unsigned num_pages,
277		  struct radeon_fence *fence)
278{
279	uint32_t size;
280	uint32_t cur_size;
281	int i, num_loops;
282	int r = 0;
283
284	/* radeon pitch is /64 */
285	size = num_pages << PAGE_SHIFT;
286	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
287	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
288	if (r) {
289		DRM_ERROR("radeon: moving bo (%d).\n", r);
290		return r;
291	}
292	/* Must wait for 2D idle & clean before DMA or hangs might happen */
293	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
294	radeon_ring_write(rdev, (1 << 16));
295	for (i = 0; i < num_loops; i++) {
296		cur_size = size;
297		if (cur_size > 0x1FFFFF) {
298			cur_size = 0x1FFFFF;
299		}
300		size -= cur_size;
301		radeon_ring_write(rdev, PACKET0(0x720, 2));
302		radeon_ring_write(rdev, src_offset);
303		radeon_ring_write(rdev, dst_offset);
304		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
305		src_offset += cur_size;
306		dst_offset += cur_size;
307	}
308	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
309	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
310	if (fence) {
311		r = radeon_fence_emit(rdev, fence);
312	}
313	radeon_ring_unlock_commit(rdev);
314	return r;
315}
316
317void r300_ring_start(struct radeon_device *rdev)
318{
319	unsigned gb_tile_config;
320	int r;
321
322	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
323	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
324	switch(rdev->num_gb_pipes) {
325	case 2:
326		gb_tile_config |= R300_PIPE_COUNT_R300;
327		break;
328	case 3:
329		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
330		break;
331	case 4:
332		gb_tile_config |= R300_PIPE_COUNT_R420;
333		break;
334	case 1:
335	default:
336		gb_tile_config |= R300_PIPE_COUNT_RV350;
337		break;
338	}
339
340	r = radeon_ring_lock(rdev, 64);
341	if (r) {
342		return;
343	}
344	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
345	radeon_ring_write(rdev,
346			  RADEON_ISYNC_ANY2D_IDLE3D |
347			  RADEON_ISYNC_ANY3D_IDLE2D |
348			  RADEON_ISYNC_WAIT_IDLEGUI |
349			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
350	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
351	radeon_ring_write(rdev, gb_tile_config);
352	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
353	radeon_ring_write(rdev,
354			  RADEON_WAIT_2D_IDLECLEAN |
355			  RADEON_WAIT_3D_IDLECLEAN);
356	radeon_ring_write(rdev, PACKET0(0x170C, 0));
357	radeon_ring_write(rdev, 1 << 31);
358	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
359	radeon_ring_write(rdev, 0);
360	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
361	radeon_ring_write(rdev, 0);
362	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
363	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
364	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
365	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
366	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
367	radeon_ring_write(rdev,
368			  RADEON_WAIT_2D_IDLECLEAN |
369			  RADEON_WAIT_3D_IDLECLEAN);
370	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
371	radeon_ring_write(rdev, 0);
372	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
373	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
374	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
375	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
376	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
377	radeon_ring_write(rdev,
378			  ((6 << R300_MS_X0_SHIFT) |
379			   (6 << R300_MS_Y0_SHIFT) |
380			   (6 << R300_MS_X1_SHIFT) |
381			   (6 << R300_MS_Y1_SHIFT) |
382			   (6 << R300_MS_X2_SHIFT) |
383			   (6 << R300_MS_Y2_SHIFT) |
384			   (6 << R300_MSBD0_Y_SHIFT) |
385			   (6 << R300_MSBD0_X_SHIFT)));
386	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
387	radeon_ring_write(rdev,
388			  ((6 << R300_MS_X3_SHIFT) |
389			   (6 << R300_MS_Y3_SHIFT) |
390			   (6 << R300_MS_X4_SHIFT) |
391			   (6 << R300_MS_Y4_SHIFT) |
392			   (6 << R300_MS_X5_SHIFT) |
393			   (6 << R300_MS_Y5_SHIFT) |
394			   (6 << R300_MSBD1_SHIFT)));
395	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
396	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
397	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
398	radeon_ring_write(rdev,
399			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
400	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
401	radeon_ring_write(rdev,
402			  R300_GEOMETRY_ROUND_NEAREST |
403			  R300_COLOR_ROUND_NEAREST);
404	radeon_ring_unlock_commit(rdev);
405}
406
407void r300_errata(struct radeon_device *rdev)
408{
409	rdev->pll_errata = 0;
410
411	if (rdev->family == CHIP_R300 &&
412	    (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
413		rdev->pll_errata |= CHIP_ERRATA_R300_CG;
414	}
415}
416
417int r300_mc_wait_for_idle(struct radeon_device *rdev)
418{
419	unsigned i;
420	uint32_t tmp;
421
422	for (i = 0; i < rdev->usec_timeout; i++) {
423		/* read MC_STATUS */
424		tmp = RREG32(0x0150);
425		if (tmp & (1 << 4)) {
426			return 0;
427		}
428		DRM_UDELAY(1);
429	}
430	return -1;
431}
432
433void r300_gpu_init(struct radeon_device *rdev)
434{
435	uint32_t gb_tile_config, tmp;
436
437	r100_hdp_reset(rdev);
438	/* FIXME: rv380 one pipes ? */
439	if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
440		/* r300,r350 */
441		rdev->num_gb_pipes = 2;
442	} else {
443		/* rv350,rv370,rv380 */
444		rdev->num_gb_pipes = 1;
445	}
446	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
447	switch (rdev->num_gb_pipes) {
448	case 2:
449		gb_tile_config |= R300_PIPE_COUNT_R300;
450		break;
451	case 3:
452		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
453		break;
454	case 4:
455		gb_tile_config |= R300_PIPE_COUNT_R420;
456		break;
457	default:
458	case 1:
459		gb_tile_config |= R300_PIPE_COUNT_RV350;
460		break;
461	}
462	WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
463
464	if (r100_gui_wait_for_idle(rdev)) {
465		printk(KERN_WARNING "Failed to wait GUI idle while "
466		       "programming pipes. Bad things might happen.\n");
467	}
468
469	tmp = RREG32(0x170C);
470	WREG32(0x170C, tmp | (1 << 31));
471
472	WREG32(R300_RB2D_DSTCACHE_MODE,
473	       R300_DC_AUTOFLUSH_ENABLE |
474	       R300_DC_DC_DISABLE_IGNORE_PE);
475
476	if (r100_gui_wait_for_idle(rdev)) {
477		printk(KERN_WARNING "Failed to wait GUI idle while "
478		       "programming pipes. Bad things might happen.\n");
479	}
480	if (r300_mc_wait_for_idle(rdev)) {
481		printk(KERN_WARNING "Failed to wait MC idle while "
482		       "programming pipes. Bad things might happen.\n");
483	}
484	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
485}
486
487int r300_ga_reset(struct radeon_device *rdev)
488{
489	uint32_t tmp;
490	bool reinit_cp;
491	int i;
492
493	reinit_cp = rdev->cp.ready;
494	rdev->cp.ready = false;
495	for (i = 0; i < rdev->usec_timeout; i++) {
496		WREG32(RADEON_CP_CSQ_MODE, 0);
497		WREG32(RADEON_CP_CSQ_CNTL, 0);
498		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
499		(void)RREG32(RADEON_RBBM_SOFT_RESET);
500		udelay(200);
501		WREG32(RADEON_RBBM_SOFT_RESET, 0);
502		/* Wait to prevent race in RBBM_STATUS */
503		mdelay(1);
504		tmp = RREG32(RADEON_RBBM_STATUS);
505		if (tmp & ((1 << 20) | (1 << 26))) {
506			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
507			/* GA still busy soft reset it */
508			WREG32(0x429C, 0x200);
509			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
510			WREG32(0x43E0, 0);
511			WREG32(0x43E4, 0);
512			WREG32(0x24AC, 0);
513		}
514		/* Wait to prevent race in RBBM_STATUS */
515		mdelay(1);
516		tmp = RREG32(RADEON_RBBM_STATUS);
517		if (!(tmp & ((1 << 20) | (1 << 26)))) {
518			break;
519		}
520	}
521	for (i = 0; i < rdev->usec_timeout; i++) {
522		tmp = RREG32(RADEON_RBBM_STATUS);
523		if (!(tmp & ((1 << 20) | (1 << 26)))) {
524			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
525				 tmp);
526			if (reinit_cp) {
527				return r100_cp_init(rdev, rdev->cp.ring_size);
528			}
529			return 0;
530		}
531		DRM_UDELAY(1);
532	}
533	tmp = RREG32(RADEON_RBBM_STATUS);
534	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
535	return -1;
536}
537
538int r300_gpu_reset(struct radeon_device *rdev)
539{
540	uint32_t status;
541
542	/* reset order likely matter */
543	status = RREG32(RADEON_RBBM_STATUS);
544	/* reset HDP */
545	r100_hdp_reset(rdev);
546	/* reset rb2d */
547	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
548		r100_rb2d_reset(rdev);
549	}
550	/* reset GA */
551	if (status & ((1 << 20) | (1 << 26))) {
552		r300_ga_reset(rdev);
553	}
554	/* reset CP */
555	status = RREG32(RADEON_RBBM_STATUS);
556	if (status & (1 << 16)) {
557		r100_cp_reset(rdev);
558	}
559	/* Check if GPU is idle */
560	status = RREG32(RADEON_RBBM_STATUS);
561	if (status & (1 << 31)) {
562		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
563		return -1;
564	}
565	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
566	return 0;
567}
568
569
570/*
571 * r300,r350,rv350,rv380 VRAM info
572 */
573void r300_vram_info(struct radeon_device *rdev)
574{
575	uint32_t tmp;
576
577	/* DDR for all card after R300 & IGP */
578	rdev->mc.vram_is_ddr = true;
579	tmp = RREG32(RADEON_MEM_CNTL);
580	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
581		rdev->mc.vram_width = 128;
582	} else {
583		rdev->mc.vram_width = 64;
584	}
585	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
586
587	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
588	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
589}
590
591
592/*
593 * Indirect registers accessor
594 */
595uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
596{
597	uint32_t r;
598
599	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
600	(void)RREG32(RADEON_PCIE_INDEX);
601	r = RREG32(RADEON_PCIE_DATA);
602	return r;
603}
604
605void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
606{
607	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
608	(void)RREG32(RADEON_PCIE_INDEX);
609	WREG32(RADEON_PCIE_DATA, (v));
610	(void)RREG32(RADEON_PCIE_DATA);
611}
612
613/*
614 * PCIE Lanes
615 */
616
617void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
618{
619	uint32_t link_width_cntl, mask;
620
621	if (rdev->flags & RADEON_IS_IGP)
622		return;
623
624	if (!(rdev->flags & RADEON_IS_PCIE))
625		return;
626
627	/* FIXME wait for idle */
628
629	switch (lanes) {
630	case 0:
631		mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
632		break;
633	case 1:
634		mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
635		break;
636	case 2:
637		mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
638		break;
639	case 4:
640		mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
641		break;
642	case 8:
643		mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
644		break;
645	case 12:
646		mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
647		break;
648	case 16:
649	default:
650		mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
651		break;
652	}
653
654	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
655
656	if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
657	    (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
658		return;
659
660	link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
661			     RADEON_PCIE_LC_RECONFIG_NOW |
662			     RADEON_PCIE_LC_RECONFIG_LATER |
663			     RADEON_PCIE_LC_SHORT_RECONFIG_EN);
664	link_width_cntl |= mask;
665	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
666	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
667						     RADEON_PCIE_LC_RECONFIG_NOW));
668
669	/* wait for lane set to complete */
670	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
671	while (link_width_cntl == 0xffffffff)
672		link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
673
674}
675
676
677/*
678 * Debugfs info
679 */
680#if defined(CONFIG_DEBUG_FS)
681static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
682{
683	struct drm_info_node *node = (struct drm_info_node *) m->private;
684	struct drm_device *dev = node->minor->dev;
685	struct radeon_device *rdev = dev->dev_private;
686	uint32_t tmp;
687
688	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
689	seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
690	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
691	seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
692	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
693	seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
694	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
695	seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
696	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
697	seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
698	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
699	seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
700	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
701	seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
702	return 0;
703}
704
705static struct drm_info_list rv370_pcie_gart_info_list[] = {
706	{"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
707};
708#endif
709
710int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
711{
712#if defined(CONFIG_DEBUG_FS)
713	return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
714#else
715	return 0;
716#endif
717}
718
719
720/*
721 * CS functions
722 */
723struct r300_cs_track_cb {
724	struct radeon_object	*robj;
725	unsigned		pitch;
726	unsigned		cpp;
727	unsigned		offset;
728};
729
730struct r300_cs_track_array {
731	struct radeon_object	*robj;
732	unsigned		esize;
733};
734
735struct r300_cs_track_texture {
736	struct radeon_object	*robj;
737	unsigned		pitch;
738	unsigned		width;
739	unsigned		height;
740	unsigned		num_levels;
741	unsigned		cpp;
742	unsigned		tex_coord_type;
743	unsigned		txdepth;
744	unsigned		width_11;
745	unsigned		height_11;
746	bool			use_pitch;
747	bool			enabled;
748	bool			roundup_w;
749	bool			roundup_h;
750};
751
752struct r300_cs_track {
753	unsigned			num_cb;
754	unsigned			maxy;
755	unsigned			vtx_size;
756	unsigned			vap_vf_cntl;
757	unsigned			immd_dwords;
758	unsigned			num_arrays;
759	unsigned			max_indx;
760	struct r300_cs_track_array	arrays[11];
761	struct r300_cs_track_cb 	cb[4];
762	struct r300_cs_track_cb 	zb;
763	struct r300_cs_track_texture	textures[16];
764	bool				z_enabled;
765};
766
767static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
768{
769	DRM_ERROR("pitch                      %d\n", t->pitch);
770	DRM_ERROR("width                      %d\n", t->width);
771	DRM_ERROR("height                     %d\n", t->height);
772	DRM_ERROR("num levels                 %d\n", t->num_levels);
773	DRM_ERROR("depth                      %d\n", t->txdepth);
774	DRM_ERROR("bpp                        %d\n", t->cpp);
775	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
776	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
777	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
778}
779
780static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
781					      struct r300_cs_track *track)
782{
783	struct radeon_object *robj;
784	unsigned long size;
785	unsigned u, i, w, h;
786
787	for (u = 0; u < 16; u++) {
788		if (!track->textures[u].enabled)
789			continue;
790		robj = track->textures[u].robj;
791		if (robj == NULL) {
792			DRM_ERROR("No texture bound to unit %u\n", u);
793			return -EINVAL;
794		}
795		size = 0;
796		for (i = 0; i <= track->textures[u].num_levels; i++) {
797			if (track->textures[u].use_pitch) {
798				w = track->textures[u].pitch / (1 << i);
799			} else {
800				w = track->textures[u].width / (1 << i);
801				if (rdev->family >= CHIP_RV515)
802					w |= track->textures[u].width_11;
803				if (track->textures[u].roundup_w)
804					w = roundup_pow_of_two(w);
805			}
806			h = track->textures[u].height / (1 << i);
807			if (rdev->family >= CHIP_RV515)
808				h |= track->textures[u].height_11;
809			if (track->textures[u].roundup_h)
810				h = roundup_pow_of_two(h);
811			size += w * h;
812		}
813		size *= track->textures[u].cpp;
814		switch (track->textures[u].tex_coord_type) {
815		case 0:
816			break;
817		case 1:
818			size *= (1 << track->textures[u].txdepth);
819			break;
820		case 2:
821			size *= 6;
822			break;
823		default:
824			DRM_ERROR("Invalid texture coordinate type %u for unit "
825				  "%u\n", track->textures[u].tex_coord_type, u);
826			return -EINVAL;
827		}
828		if (size > radeon_object_size(robj)) {
829			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
830				  "%lu\n", u, size, radeon_object_size(robj));
831			r300_cs_track_texture_print(&track->textures[u]);
832			return -EINVAL;
833		}
834	}
835	return 0;
836}
837
838int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
839{
840	unsigned i;
841	unsigned long size;
842	unsigned prim_walk;
843	unsigned nverts;
844
845	for (i = 0; i < track->num_cb; i++) {
846		if (track->cb[i].robj == NULL) {
847			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
848			return -EINVAL;
849		}
850		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
851		size += track->cb[i].offset;
852		if (size > radeon_object_size(track->cb[i].robj)) {
853			DRM_ERROR("[drm] Buffer too small for color buffer %d "
854				  "(need %lu have %lu) !\n", i, size,
855				  radeon_object_size(track->cb[i].robj));
856			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
857				  i, track->cb[i].pitch, track->cb[i].cpp,
858				  track->cb[i].offset, track->maxy);
859			return -EINVAL;
860		}
861	}
862	if (track->z_enabled) {
863		if (track->zb.robj == NULL) {
864			DRM_ERROR("[drm] No buffer for z buffer !\n");
865			return -EINVAL;
866		}
867		size = track->zb.pitch * track->zb.cpp * track->maxy;
868		size += track->zb.offset;
869		if (size > radeon_object_size(track->zb.robj)) {
870			DRM_ERROR("[drm] Buffer too small for z buffer "
871				  "(need %lu have %lu) !\n", size,
872				  radeon_object_size(track->zb.robj));
873			return -EINVAL;
874		}
875	}
876	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
877	nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
878	switch (prim_walk) {
879	case 1:
880		for (i = 0; i < track->num_arrays; i++) {
881			size = track->arrays[i].esize * track->max_indx * 4;
882			if (track->arrays[i].robj == NULL) {
883				DRM_ERROR("(PW %u) Vertex array %u no buffer "
884					  "bound\n", prim_walk, i);
885				return -EINVAL;
886			}
887			if (size > radeon_object_size(track->arrays[i].robj)) {
888				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
889					   "have %lu dwords\n", prim_walk, i,
890					   size >> 2,
891					   radeon_object_size(track->arrays[i].robj) >> 2);
892				DRM_ERROR("Max indices %u\n", track->max_indx);
893				return -EINVAL;
894			}
895		}
896		break;
897	case 2:
898		for (i = 0; i < track->num_arrays; i++) {
899			size = track->arrays[i].esize * (nverts - 1) * 4;
900			if (track->arrays[i].robj == NULL) {
901				DRM_ERROR("(PW %u) Vertex array %u no buffer "
902					  "bound\n", prim_walk, i);
903				return -EINVAL;
904			}
905			if (size > radeon_object_size(track->arrays[i].robj)) {
906				DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
907					   "have %lu dwords\n", prim_walk, i, size >> 2,
908					   radeon_object_size(track->arrays[i].robj) >> 2);
909				return -EINVAL;
910			}
911		}
912		break;
913	case 3:
914		size = track->vtx_size * nverts;
915		if (size != track->immd_dwords) {
916			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
917				  track->immd_dwords, size);
918			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
919				  nverts, track->vtx_size);
920			return -EINVAL;
921		}
922		break;
923	default:
924		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
925			  prim_walk);
926		return -EINVAL;
927	}
928	return r300_cs_track_texture_check(rdev, track);
929}
930
931static inline void r300_cs_track_clear(struct r300_cs_track *track)
932{
933	unsigned i;
934
935	track->num_cb = 4;
936	track->maxy = 4096;
937	for (i = 0; i < track->num_cb; i++) {
938		track->cb[i].robj = NULL;
939		track->cb[i].pitch = 8192;
940		track->cb[i].cpp = 16;
941		track->cb[i].offset = 0;
942	}
943	track->z_enabled = true;
944	track->zb.robj = NULL;
945	track->zb.pitch = 8192;
946	track->zb.cpp = 4;
947	track->zb.offset = 0;
948	track->vtx_size = 0x7F;
949	track->immd_dwords = 0xFFFFFFFFUL;
950	track->num_arrays = 11;
951	track->max_indx = 0x00FFFFFFUL;
952	for (i = 0; i < track->num_arrays; i++) {
953		track->arrays[i].robj = NULL;
954		track->arrays[i].esize = 0x7F;
955	}
956	for (i = 0; i < 16; i++) {
957		track->textures[i].pitch = 16536;
958		track->textures[i].width = 16536;
959		track->textures[i].height = 16536;
960		track->textures[i].width_11 = 1 << 11;
961		track->textures[i].height_11 = 1 << 11;
962		track->textures[i].num_levels = 12;
963		track->textures[i].txdepth = 16;
964		track->textures[i].cpp = 64;
965		track->textures[i].tex_coord_type = 1;
966		track->textures[i].robj = NULL;
967		/* CS IB emission code makes sure texture unit are disabled */
968		track->textures[i].enabled = false;
969		track->textures[i].roundup_w = true;
970		track->textures[i].roundup_h = true;
971	}
972}
973
974static const unsigned r300_reg_safe_bm[159] = {
975	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
976	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
977	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985	0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
986	0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
987	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
989	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
990	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991	0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
992	0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
993	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
994	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
995	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
996	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001	0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1005	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1006	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1007	0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1008	0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1009	0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1010	0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1011	0x00000000, 0x00000000, 0x00000000, 0x00000000,
1012	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1013	0x00000000, 0x00000000, 0x00000000, 0x00000000,
1014	0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1015};
1016
1017static int r300_packet0_check(struct radeon_cs_parser *p,
1018		struct radeon_cs_packet *pkt,
1019		unsigned idx, unsigned reg)
1020{
1021	struct radeon_cs_chunk *ib_chunk;
1022	struct radeon_cs_reloc *reloc;
1023	struct r300_cs_track *track;
1024	volatile uint32_t *ib;
1025	uint32_t tmp;
1026	unsigned i;
1027	int r;
1028
1029	ib = p->ib->ptr;
1030	ib_chunk = &p->chunks[p->chunk_ib_idx];
1031	track = (struct r300_cs_track*)p->track;
1032	switch(reg) {
1033	case AVIVO_D1MODE_VLINE_START_END:
1034	case RADEON_CRTC_GUI_TRIG_VLINE:
1035		r = r100_cs_packet_parse_vline(p);
1036		if (r) {
1037			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1038					idx, reg);
1039			r100_cs_dump_packet(p, pkt);
1040			return r;
1041		}
1042		break;
1043	case RADEON_DST_PITCH_OFFSET:
1044	case RADEON_SRC_PITCH_OFFSET:
1045		r = r100_cs_packet_next_reloc(p, &reloc);
1046		if (r) {
1047			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1048					idx, reg);
1049			r100_cs_dump_packet(p, pkt);
1050			return r;
1051		}
1052		tmp = ib_chunk->kdata[idx] & 0x003fffff;
1053		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1054		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
1055		break;
1056	case R300_RB3D_COLOROFFSET0:
1057	case R300_RB3D_COLOROFFSET1:
1058	case R300_RB3D_COLOROFFSET2:
1059	case R300_RB3D_COLOROFFSET3:
1060		i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1061		r = r100_cs_packet_next_reloc(p, &reloc);
1062		if (r) {
1063			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1064					idx, reg);
1065			r100_cs_dump_packet(p, pkt);
1066			return r;
1067		}
1068		track->cb[i].robj = reloc->robj;
1069		track->cb[i].offset = ib_chunk->kdata[idx];
1070		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1071		break;
1072	case R300_ZB_DEPTHOFFSET:
1073		r = r100_cs_packet_next_reloc(p, &reloc);
1074		if (r) {
1075			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1076					idx, reg);
1077			r100_cs_dump_packet(p, pkt);
1078			return r;
1079		}
1080		track->zb.robj = reloc->robj;
1081		track->zb.offset = ib_chunk->kdata[idx];
1082		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1083		break;
1084	case R300_TX_OFFSET_0:
1085	case R300_TX_OFFSET_0+4:
1086	case R300_TX_OFFSET_0+8:
1087	case R300_TX_OFFSET_0+12:
1088	case R300_TX_OFFSET_0+16:
1089	case R300_TX_OFFSET_0+20:
1090	case R300_TX_OFFSET_0+24:
1091	case R300_TX_OFFSET_0+28:
1092	case R300_TX_OFFSET_0+32:
1093	case R300_TX_OFFSET_0+36:
1094	case R300_TX_OFFSET_0+40:
1095	case R300_TX_OFFSET_0+44:
1096	case R300_TX_OFFSET_0+48:
1097	case R300_TX_OFFSET_0+52:
1098	case R300_TX_OFFSET_0+56:
1099	case R300_TX_OFFSET_0+60:
1100		i = (reg - R300_TX_OFFSET_0) >> 2;
1101		r = r100_cs_packet_next_reloc(p, &reloc);
1102		if (r) {
1103			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1104					idx, reg);
1105			r100_cs_dump_packet(p, pkt);
1106			return r;
1107		}
1108		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1109		track->textures[i].robj = reloc->robj;
1110		break;
1111	/* Tracked registers */
1112	case 0x2084:
1113		/* VAP_VF_CNTL */
1114		track->vap_vf_cntl = ib_chunk->kdata[idx];
1115		break;
1116	case 0x20B4:
1117		/* VAP_VTX_SIZE */
1118		track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1119		break;
1120	case 0x2134:
1121		/* VAP_VF_MAX_VTX_INDX */
1122		track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1123		break;
1124	case 0x43E4:
1125		/* SC_SCISSOR1 */
1126		track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1127		if (p->rdev->family < CHIP_RV515) {
1128			track->maxy -= 1440;
1129		}
1130		break;
1131	case 0x4E00:
1132		/* RB3D_CCTL */
1133		track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1134		break;
1135	case 0x4E38:
1136	case 0x4E3C:
1137	case 0x4E40:
1138	case 0x4E44:
1139		/* RB3D_COLORPITCH0 */
1140		/* RB3D_COLORPITCH1 */
1141		/* RB3D_COLORPITCH2 */
1142		/* RB3D_COLORPITCH3 */
1143		i = (reg - 0x4E38) >> 2;
1144		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1145		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1146		case 9:
1147		case 11:
1148		case 12:
1149			track->cb[i].cpp = 1;
1150			break;
1151		case 3:
1152		case 4:
1153		case 13:
1154		case 15:
1155			track->cb[i].cpp = 2;
1156			break;
1157		case 6:
1158			track->cb[i].cpp = 4;
1159			break;
1160		case 10:
1161			track->cb[i].cpp = 8;
1162			break;
1163		case 7:
1164			track->cb[i].cpp = 16;
1165			break;
1166		default:
1167			DRM_ERROR("Invalid color buffer format (%d) !\n",
1168				  ((ib_chunk->kdata[idx] >> 21) & 0xF));
1169			return -EINVAL;
1170		}
1171		break;
1172	case 0x4F00:
1173		/* ZB_CNTL */
1174		if (ib_chunk->kdata[idx] & 2) {
1175			track->z_enabled = true;
1176		} else {
1177			track->z_enabled = false;
1178		}
1179		break;
1180	case 0x4F10:
1181		/* ZB_FORMAT */
1182		switch ((ib_chunk->kdata[idx] & 0xF)) {
1183		case 0:
1184		case 1:
1185			track->zb.cpp = 2;
1186			break;
1187		case 2:
1188			track->zb.cpp = 4;
1189			break;
1190		default:
1191			DRM_ERROR("Invalid z buffer format (%d) !\n",
1192				  (ib_chunk->kdata[idx] & 0xF));
1193			return -EINVAL;
1194		}
1195		break;
1196	case 0x4F24:
1197		/* ZB_DEPTHPITCH */
1198		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1199		break;
1200	case 0x4104:
1201		for (i = 0; i < 16; i++) {
1202			bool enabled;
1203
1204			enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1205			track->textures[i].enabled = enabled;
1206		}
1207		break;
1208	case 0x44C0:
1209	case 0x44C4:
1210	case 0x44C8:
1211	case 0x44CC:
1212	case 0x44D0:
1213	case 0x44D4:
1214	case 0x44D8:
1215	case 0x44DC:
1216	case 0x44E0:
1217	case 0x44E4:
1218	case 0x44E8:
1219	case 0x44EC:
1220	case 0x44F0:
1221	case 0x44F4:
1222	case 0x44F8:
1223	case 0x44FC:
1224		/* TX_FORMAT1_[0-15] */
1225		i = (reg - 0x44C0) >> 2;
1226		tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1227		track->textures[i].tex_coord_type = tmp;
1228		switch ((ib_chunk->kdata[idx] & 0x1F)) {
1229		case 0:
1230		case 2:
1231		case 5:
1232		case 18:
1233		case 20:
1234		case 21:
1235			track->textures[i].cpp = 1;
1236			break;
1237		case 1:
1238		case 3:
1239		case 6:
1240		case 7:
1241		case 10:
1242		case 11:
1243		case 19:
1244		case 22:
1245		case 24:
1246			track->textures[i].cpp = 2;
1247			break;
1248		case 4:
1249		case 8:
1250		case 9:
1251		case 12:
1252		case 13:
1253		case 23:
1254		case 25:
1255		case 27:
1256		case 30:
1257			track->textures[i].cpp = 4;
1258			break;
1259		case 14:
1260		case 26:
1261		case 28:
1262			track->textures[i].cpp = 8;
1263			break;
1264		case 29:
1265			track->textures[i].cpp = 16;
1266			break;
1267		default:
1268			DRM_ERROR("Invalid texture format %u\n",
1269				  (ib_chunk->kdata[idx] & 0x1F));
1270			return -EINVAL;
1271			break;
1272		}
1273		break;
1274	case 0x4400:
1275	case 0x4404:
1276	case 0x4408:
1277	case 0x440C:
1278	case 0x4410:
1279	case 0x4414:
1280	case 0x4418:
1281	case 0x441C:
1282	case 0x4420:
1283	case 0x4424:
1284	case 0x4428:
1285	case 0x442C:
1286	case 0x4430:
1287	case 0x4434:
1288	case 0x4438:
1289	case 0x443C:
1290		/* TX_FILTER0_[0-15] */
1291		i = (reg - 0x4400) >> 2;
1292		tmp = ib_chunk->kdata[idx] & 0x7;;
1293		if (tmp == 2 || tmp == 4 || tmp == 6) {
1294			track->textures[i].roundup_w = false;
1295		}
1296		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1297		if (tmp == 2 || tmp == 4 || tmp == 6) {
1298			track->textures[i].roundup_h = false;
1299		}
1300		break;
1301	case 0x4500:
1302	case 0x4504:
1303	case 0x4508:
1304	case 0x450C:
1305	case 0x4510:
1306	case 0x4514:
1307	case 0x4518:
1308	case 0x451C:
1309	case 0x4520:
1310	case 0x4524:
1311	case 0x4528:
1312	case 0x452C:
1313	case 0x4530:
1314	case 0x4534:
1315	case 0x4538:
1316	case 0x453C:
1317		/* TX_FORMAT2_[0-15] */
1318		i = (reg - 0x4500) >> 2;
1319		tmp = ib_chunk->kdata[idx] & 0x3FFF;
1320		track->textures[i].pitch = tmp + 1;
1321		if (p->rdev->family >= CHIP_RV515) {
1322			tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1323			track->textures[i].width_11 = tmp;
1324			tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1325			track->textures[i].height_11 = tmp;
1326		}
1327		break;
1328	case 0x4480:
1329	case 0x4484:
1330	case 0x4488:
1331	case 0x448C:
1332	case 0x4490:
1333	case 0x4494:
1334	case 0x4498:
1335	case 0x449C:
1336	case 0x44A0:
1337	case 0x44A4:
1338	case 0x44A8:
1339	case 0x44AC:
1340	case 0x44B0:
1341	case 0x44B4:
1342	case 0x44B8:
1343	case 0x44BC:
1344		/* TX_FORMAT0_[0-15] */
1345		i = (reg - 0x4480) >> 2;
1346		tmp = ib_chunk->kdata[idx] & 0x7FF;
1347		track->textures[i].width = tmp + 1;
1348		tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1349		track->textures[i].height = tmp + 1;
1350		tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1351		track->textures[i].num_levels = tmp;
1352		tmp = ib_chunk->kdata[idx] & (1 << 31);
1353		track->textures[i].use_pitch = !!tmp;
1354		tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1355		track->textures[i].txdepth = tmp;
1356		break;
1357	default:
1358		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1359		       reg, idx);
1360		return -EINVAL;
1361	}
1362	return 0;
1363}
1364
1365static int r300_packet3_check(struct radeon_cs_parser *p,
1366			      struct radeon_cs_packet *pkt)
1367{
1368	struct radeon_cs_chunk *ib_chunk;
1369	struct radeon_cs_reloc *reloc;
1370	struct r300_cs_track *track;
1371	volatile uint32_t *ib;
1372	unsigned idx;
1373	unsigned i, c;
1374	int r;
1375
1376	ib = p->ib->ptr;
1377	ib_chunk = &p->chunks[p->chunk_ib_idx];
1378	idx = pkt->idx + 1;
1379	track = (struct r300_cs_track*)p->track;
1380	switch(pkt->opcode) {
1381	case PACKET3_3D_LOAD_VBPNTR:
1382		c = ib_chunk->kdata[idx++] & 0x1F;
1383		track->num_arrays = c;
1384		for (i = 0; i < (c - 1); i+=2, idx+=3) {
1385			r = r100_cs_packet_next_reloc(p, &reloc);
1386			if (r) {
1387				DRM_ERROR("No reloc for packet3 %d\n",
1388					  pkt->opcode);
1389				r100_cs_dump_packet(p, pkt);
1390				return r;
1391			}
1392			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1393			track->arrays[i + 0].robj = reloc->robj;
1394			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1395			track->arrays[i + 0].esize &= 0x7F;
1396			r = r100_cs_packet_next_reloc(p, &reloc);
1397			if (r) {
1398				DRM_ERROR("No reloc for packet3 %d\n",
1399					  pkt->opcode);
1400				r100_cs_dump_packet(p, pkt);
1401				return r;
1402			}
1403			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1404			track->arrays[i + 1].robj = reloc->robj;
1405			track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1406			track->arrays[i + 1].esize &= 0x7F;
1407		}
1408		if (c & 1) {
1409			r = r100_cs_packet_next_reloc(p, &reloc);
1410			if (r) {
1411				DRM_ERROR("No reloc for packet3 %d\n",
1412					  pkt->opcode);
1413				r100_cs_dump_packet(p, pkt);
1414				return r;
1415			}
1416			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1417			track->arrays[i + 0].robj = reloc->robj;
1418			track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1419			track->arrays[i + 0].esize &= 0x7F;
1420		}
1421		break;
1422	case PACKET3_INDX_BUFFER:
1423		r = r100_cs_packet_next_reloc(p, &reloc);
1424		if (r) {
1425			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1426			r100_cs_dump_packet(p, pkt);
1427			return r;
1428		}
1429		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1430		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1431		if (r) {
1432			return r;
1433		}
1434		break;
1435	/* Draw packet */
1436	case PACKET3_3D_DRAW_IMMD:
1437		/* Number of dwords is vtx_size * (num_vertices - 1)
1438		 * PRIM_WALK must be equal to 3 vertex data in embedded
1439		 * in cmd stream */
1440		if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1441			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1442			return -EINVAL;
1443		}
1444		track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1445		track->immd_dwords = pkt->count - 1;
1446		r = r300_cs_track_check(p->rdev, track);
1447		if (r) {
1448			return r;
1449		}
1450		break;
1451	case PACKET3_3D_DRAW_IMMD_2:
1452		/* Number of dwords is vtx_size * (num_vertices - 1)
1453		 * PRIM_WALK must be equal to 3 vertex data in embedded
1454		 * in cmd stream */
1455		if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1456			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1457			return -EINVAL;
1458		}
1459		track->vap_vf_cntl = ib_chunk->kdata[idx];
1460		track->immd_dwords = pkt->count;
1461		r = r300_cs_track_check(p->rdev, track);
1462		if (r) {
1463			return r;
1464		}
1465		break;
1466	case PACKET3_3D_DRAW_VBUF:
1467		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1468		r = r300_cs_track_check(p->rdev, track);
1469		if (r) {
1470			return r;
1471		}
1472		break;
1473	case PACKET3_3D_DRAW_VBUF_2:
1474		track->vap_vf_cntl = ib_chunk->kdata[idx];
1475		r = r300_cs_track_check(p->rdev, track);
1476		if (r) {
1477			return r;
1478		}
1479		break;
1480	case PACKET3_3D_DRAW_INDX:
1481		track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1482		r = r300_cs_track_check(p->rdev, track);
1483		if (r) {
1484			return r;
1485		}
1486		break;
1487	case PACKET3_3D_DRAW_INDX_2:
1488		track->vap_vf_cntl = ib_chunk->kdata[idx];
1489		r = r300_cs_track_check(p->rdev, track);
1490		if (r) {
1491			return r;
1492		}
1493		break;
1494	case PACKET3_NOP:
1495		break;
1496	default:
1497		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1498		return -EINVAL;
1499	}
1500	return 0;
1501}
1502
1503int r300_cs_parse(struct radeon_cs_parser *p)
1504{
1505	struct radeon_cs_packet pkt;
1506	struct r300_cs_track track;
1507	int r;
1508
1509	r300_cs_track_clear(&track);
1510	p->track = &track;
1511	do {
1512		r = r100_cs_packet_parse(p, &pkt, p->idx);
1513		if (r) {
1514			return r;
1515		}
1516		p->idx += pkt.count + 2;
1517		switch (pkt.type) {
1518		case PACKET_TYPE0:
1519			r = r100_cs_parse_packet0(p, &pkt,
1520						  p->rdev->config.r300.reg_safe_bm,
1521						  p->rdev->config.r300.reg_safe_bm_size,
1522						  &r300_packet0_check);
1523			break;
1524		case PACKET_TYPE2:
1525			break;
1526		case PACKET_TYPE3:
1527			r = r300_packet3_check(p, &pkt);
1528			break;
1529		default:
1530			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1531			return -EINVAL;
1532		}
1533		if (r) {
1534			return r;
1535		}
1536	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1537	return 0;
1538}
1539
1540int r300_init(struct radeon_device *rdev)
1541{
1542	rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1543	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1544	return 0;
1545}
1546