ni.c revision a49a50dad48586d42ebac1a6730c3a3cd5603421
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include "drmP.h"
28#include "radeon.h"
29#include "radeon_asic.h"
30#include "radeon_drm.h"
31#include "nid.h"
32#include "atom.h"
33#include "ni_reg.h"
34#include "cayman_blit_shaders.h"
35
36extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
37extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
38extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
39extern void evergreen_mc_program(struct radeon_device *rdev);
40extern void evergreen_irq_suspend(struct radeon_device *rdev);
41extern int evergreen_mc_init(struct radeon_device *rdev);
42
43#define EVERGREEN_PFP_UCODE_SIZE 1120
44#define EVERGREEN_PM4_UCODE_SIZE 1376
45#define EVERGREEN_RLC_UCODE_SIZE 768
46#define BTC_MC_UCODE_SIZE 6024
47
48#define CAYMAN_PFP_UCODE_SIZE 2176
49#define CAYMAN_PM4_UCODE_SIZE 2176
50#define CAYMAN_RLC_UCODE_SIZE 1024
51#define CAYMAN_MC_UCODE_SIZE 6037
52
53/* Firmware Names */
54MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
55MODULE_FIRMWARE("radeon/BARTS_me.bin");
56MODULE_FIRMWARE("radeon/BARTS_mc.bin");
57MODULE_FIRMWARE("radeon/BTC_rlc.bin");
58MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
59MODULE_FIRMWARE("radeon/TURKS_me.bin");
60MODULE_FIRMWARE("radeon/TURKS_mc.bin");
61MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
62MODULE_FIRMWARE("radeon/CAICOS_me.bin");
63MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
64MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
65MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
66MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
67MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
68
69#define BTC_IO_MC_REGS_SIZE 29
70
71static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
72	{0x00000077, 0xff010100},
73	{0x00000078, 0x00000000},
74	{0x00000079, 0x00001434},
75	{0x0000007a, 0xcc08ec08},
76	{0x0000007b, 0x00040000},
77	{0x0000007c, 0x000080c0},
78	{0x0000007d, 0x09000000},
79	{0x0000007e, 0x00210404},
80	{0x00000081, 0x08a8e800},
81	{0x00000082, 0x00030444},
82	{0x00000083, 0x00000000},
83	{0x00000085, 0x00000001},
84	{0x00000086, 0x00000002},
85	{0x00000087, 0x48490000},
86	{0x00000088, 0x20244647},
87	{0x00000089, 0x00000005},
88	{0x0000008b, 0x66030000},
89	{0x0000008c, 0x00006603},
90	{0x0000008d, 0x00000100},
91	{0x0000008f, 0x00001c0a},
92	{0x00000090, 0xff000001},
93	{0x00000094, 0x00101101},
94	{0x00000095, 0x00000fff},
95	{0x00000096, 0x00116fff},
96	{0x00000097, 0x60010000},
97	{0x00000098, 0x10010000},
98	{0x00000099, 0x00006000},
99	{0x0000009a, 0x00001000},
100	{0x0000009f, 0x00946a00}
101};
102
103static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
104	{0x00000077, 0xff010100},
105	{0x00000078, 0x00000000},
106	{0x00000079, 0x00001434},
107	{0x0000007a, 0xcc08ec08},
108	{0x0000007b, 0x00040000},
109	{0x0000007c, 0x000080c0},
110	{0x0000007d, 0x09000000},
111	{0x0000007e, 0x00210404},
112	{0x00000081, 0x08a8e800},
113	{0x00000082, 0x00030444},
114	{0x00000083, 0x00000000},
115	{0x00000085, 0x00000001},
116	{0x00000086, 0x00000002},
117	{0x00000087, 0x48490000},
118	{0x00000088, 0x20244647},
119	{0x00000089, 0x00000005},
120	{0x0000008b, 0x66030000},
121	{0x0000008c, 0x00006603},
122	{0x0000008d, 0x00000100},
123	{0x0000008f, 0x00001c0a},
124	{0x00000090, 0xff000001},
125	{0x00000094, 0x00101101},
126	{0x00000095, 0x00000fff},
127	{0x00000096, 0x00116fff},
128	{0x00000097, 0x60010000},
129	{0x00000098, 0x10010000},
130	{0x00000099, 0x00006000},
131	{0x0000009a, 0x00001000},
132	{0x0000009f, 0x00936a00}
133};
134
135static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
136	{0x00000077, 0xff010100},
137	{0x00000078, 0x00000000},
138	{0x00000079, 0x00001434},
139	{0x0000007a, 0xcc08ec08},
140	{0x0000007b, 0x00040000},
141	{0x0000007c, 0x000080c0},
142	{0x0000007d, 0x09000000},
143	{0x0000007e, 0x00210404},
144	{0x00000081, 0x08a8e800},
145	{0x00000082, 0x00030444},
146	{0x00000083, 0x00000000},
147	{0x00000085, 0x00000001},
148	{0x00000086, 0x00000002},
149	{0x00000087, 0x48490000},
150	{0x00000088, 0x20244647},
151	{0x00000089, 0x00000005},
152	{0x0000008b, 0x66030000},
153	{0x0000008c, 0x00006603},
154	{0x0000008d, 0x00000100},
155	{0x0000008f, 0x00001c0a},
156	{0x00000090, 0xff000001},
157	{0x00000094, 0x00101101},
158	{0x00000095, 0x00000fff},
159	{0x00000096, 0x00116fff},
160	{0x00000097, 0x60010000},
161	{0x00000098, 0x10010000},
162	{0x00000099, 0x00006000},
163	{0x0000009a, 0x00001000},
164	{0x0000009f, 0x00916a00}
165};
166
167static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
168	{0x00000077, 0xff010100},
169	{0x00000078, 0x00000000},
170	{0x00000079, 0x00001434},
171	{0x0000007a, 0xcc08ec08},
172	{0x0000007b, 0x00040000},
173	{0x0000007c, 0x000080c0},
174	{0x0000007d, 0x09000000},
175	{0x0000007e, 0x00210404},
176	{0x00000081, 0x08a8e800},
177	{0x00000082, 0x00030444},
178	{0x00000083, 0x00000000},
179	{0x00000085, 0x00000001},
180	{0x00000086, 0x00000002},
181	{0x00000087, 0x48490000},
182	{0x00000088, 0x20244647},
183	{0x00000089, 0x00000005},
184	{0x0000008b, 0x66030000},
185	{0x0000008c, 0x00006603},
186	{0x0000008d, 0x00000100},
187	{0x0000008f, 0x00001c0a},
188	{0x00000090, 0xff000001},
189	{0x00000094, 0x00101101},
190	{0x00000095, 0x00000fff},
191	{0x00000096, 0x00116fff},
192	{0x00000097, 0x60010000},
193	{0x00000098, 0x10010000},
194	{0x00000099, 0x00006000},
195	{0x0000009a, 0x00001000},
196	{0x0000009f, 0x00976b00}
197};
198
199int ni_mc_load_microcode(struct radeon_device *rdev)
200{
201	const __be32 *fw_data;
202	u32 mem_type, running, blackout = 0;
203	u32 *io_mc_regs;
204	int i, ucode_size, regs_size;
205
206	if (!rdev->mc_fw)
207		return -EINVAL;
208
209	switch (rdev->family) {
210	case CHIP_BARTS:
211		io_mc_regs = (u32 *)&barts_io_mc_regs;
212		ucode_size = BTC_MC_UCODE_SIZE;
213		regs_size = BTC_IO_MC_REGS_SIZE;
214		break;
215	case CHIP_TURKS:
216		io_mc_regs = (u32 *)&turks_io_mc_regs;
217		ucode_size = BTC_MC_UCODE_SIZE;
218		regs_size = BTC_IO_MC_REGS_SIZE;
219		break;
220	case CHIP_CAICOS:
221	default:
222		io_mc_regs = (u32 *)&caicos_io_mc_regs;
223		ucode_size = BTC_MC_UCODE_SIZE;
224		regs_size = BTC_IO_MC_REGS_SIZE;
225		break;
226	case CHIP_CAYMAN:
227		io_mc_regs = (u32 *)&cayman_io_mc_regs;
228		ucode_size = CAYMAN_MC_UCODE_SIZE;
229		regs_size = BTC_IO_MC_REGS_SIZE;
230		break;
231	}
232
233	mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
234	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
235
236	if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
237		if (running) {
238			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
239			WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
240		}
241
242		/* reset the engine and set to writable */
243		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
244		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
245
246		/* load mc io regs */
247		for (i = 0; i < regs_size; i++) {
248			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
249			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
250		}
251		/* load the MC ucode */
252		fw_data = (const __be32 *)rdev->mc_fw->data;
253		for (i = 0; i < ucode_size; i++)
254			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
255
256		/* put the engine back into the active state */
257		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
258		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
259		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
260
261		/* wait for training to complete */
262		while (!(RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD))
263			udelay(10);
264
265		if (running)
266			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
267	}
268
269	return 0;
270}
271
272int ni_init_microcode(struct radeon_device *rdev)
273{
274	struct platform_device *pdev;
275	const char *chip_name;
276	const char *rlc_chip_name;
277	size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
278	char fw_name[30];
279	int err;
280
281	DRM_DEBUG("\n");
282
283	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
284	err = IS_ERR(pdev);
285	if (err) {
286		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
287		return -EINVAL;
288	}
289
290	switch (rdev->family) {
291	case CHIP_BARTS:
292		chip_name = "BARTS";
293		rlc_chip_name = "BTC";
294		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
295		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
296		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
297		mc_req_size = BTC_MC_UCODE_SIZE * 4;
298		break;
299	case CHIP_TURKS:
300		chip_name = "TURKS";
301		rlc_chip_name = "BTC";
302		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
303		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
304		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
305		mc_req_size = BTC_MC_UCODE_SIZE * 4;
306		break;
307	case CHIP_CAICOS:
308		chip_name = "CAICOS";
309		rlc_chip_name = "BTC";
310		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
311		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
312		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
313		mc_req_size = BTC_MC_UCODE_SIZE * 4;
314		break;
315	case CHIP_CAYMAN:
316		chip_name = "CAYMAN";
317		rlc_chip_name = "CAYMAN";
318		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
319		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
320		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
321		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
322		break;
323	default: BUG();
324	}
325
326	DRM_INFO("Loading %s Microcode\n", chip_name);
327
328	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
329	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
330	if (err)
331		goto out;
332	if (rdev->pfp_fw->size != pfp_req_size) {
333		printk(KERN_ERR
334		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
335		       rdev->pfp_fw->size, fw_name);
336		err = -EINVAL;
337		goto out;
338	}
339
340	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
341	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
342	if (err)
343		goto out;
344	if (rdev->me_fw->size != me_req_size) {
345		printk(KERN_ERR
346		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
347		       rdev->me_fw->size, fw_name);
348		err = -EINVAL;
349	}
350
351	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
352	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
353	if (err)
354		goto out;
355	if (rdev->rlc_fw->size != rlc_req_size) {
356		printk(KERN_ERR
357		       "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
358		       rdev->rlc_fw->size, fw_name);
359		err = -EINVAL;
360	}
361
362	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
363	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
364	if (err)
365		goto out;
366	if (rdev->mc_fw->size != mc_req_size) {
367		printk(KERN_ERR
368		       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
369		       rdev->mc_fw->size, fw_name);
370		err = -EINVAL;
371	}
372out:
373	platform_device_unregister(pdev);
374
375	if (err) {
376		if (err != -EINVAL)
377			printk(KERN_ERR
378			       "ni_cp: Failed to load firmware \"%s\"\n",
379			       fw_name);
380		release_firmware(rdev->pfp_fw);
381		rdev->pfp_fw = NULL;
382		release_firmware(rdev->me_fw);
383		rdev->me_fw = NULL;
384		release_firmware(rdev->rlc_fw);
385		rdev->rlc_fw = NULL;
386		release_firmware(rdev->mc_fw);
387		rdev->mc_fw = NULL;
388	}
389	return err;
390}
391
392/*
393 * Core functions
394 */
395static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
396					       u32 num_tile_pipes,
397					       u32 num_backends_per_asic,
398					       u32 *backend_disable_mask_per_asic,
399					       u32 num_shader_engines)
400{
401	u32 backend_map = 0;
402	u32 enabled_backends_mask = 0;
403	u32 enabled_backends_count = 0;
404	u32 num_backends_per_se;
405	u32 cur_pipe;
406	u32 swizzle_pipe[CAYMAN_MAX_PIPES];
407	u32 cur_backend = 0;
408	u32 i;
409	bool force_no_swizzle;
410
411	/* force legal values */
412	if (num_tile_pipes < 1)
413		num_tile_pipes = 1;
414	if (num_tile_pipes > rdev->config.cayman.max_tile_pipes)
415		num_tile_pipes = rdev->config.cayman.max_tile_pipes;
416	if (num_shader_engines < 1)
417		num_shader_engines = 1;
418	if (num_shader_engines > rdev->config.cayman.max_shader_engines)
419		num_shader_engines = rdev->config.cayman.max_shader_engines;
420	if (num_backends_per_asic < num_shader_engines)
421		num_backends_per_asic = num_shader_engines;
422	if (num_backends_per_asic > (rdev->config.cayman.max_backends_per_se * num_shader_engines))
423		num_backends_per_asic = rdev->config.cayman.max_backends_per_se * num_shader_engines;
424
425	/* make sure we have the same number of backends per se */
426	num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
427	/* set up the number of backends per se */
428	num_backends_per_se = num_backends_per_asic / num_shader_engines;
429	if (num_backends_per_se > rdev->config.cayman.max_backends_per_se) {
430		num_backends_per_se = rdev->config.cayman.max_backends_per_se;
431		num_backends_per_asic = num_backends_per_se * num_shader_engines;
432	}
433
434	/* create enable mask and count for enabled backends */
435	for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
436		if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
437			enabled_backends_mask |= (1 << i);
438			++enabled_backends_count;
439		}
440		if (enabled_backends_count == num_backends_per_asic)
441			break;
442	}
443
444	/* force the backends mask to match the current number of backends */
445	if (enabled_backends_count != num_backends_per_asic) {
446		u32 this_backend_enabled;
447		u32 shader_engine;
448		u32 backend_per_se;
449
450		enabled_backends_mask = 0;
451		enabled_backends_count = 0;
452		*backend_disable_mask_per_asic = CAYMAN_MAX_BACKENDS_MASK;
453		for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
454			/* calc the current se */
455			shader_engine = i / rdev->config.cayman.max_backends_per_se;
456			/* calc the backend per se */
457			backend_per_se = i % rdev->config.cayman.max_backends_per_se;
458			/* default to not enabled */
459			this_backend_enabled = 0;
460			if ((shader_engine < num_shader_engines) &&
461			    (backend_per_se < num_backends_per_se))
462				this_backend_enabled = 1;
463			if (this_backend_enabled) {
464				enabled_backends_mask |= (1 << i);
465				*backend_disable_mask_per_asic &= ~(1 << i);
466				++enabled_backends_count;
467			}
468		}
469	}
470
471
472	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * CAYMAN_MAX_PIPES);
473	switch (rdev->family) {
474	case CHIP_CAYMAN:
475		force_no_swizzle = true;
476		break;
477	default:
478		force_no_swizzle = false;
479		break;
480	}
481	if (force_no_swizzle) {
482		bool last_backend_enabled = false;
483
484		force_no_swizzle = false;
485		for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
486			if (((enabled_backends_mask >> i) & 1) == 1) {
487				if (last_backend_enabled)
488					force_no_swizzle = true;
489				last_backend_enabled = true;
490			} else
491				last_backend_enabled = false;
492		}
493	}
494
495	switch (num_tile_pipes) {
496	case 1:
497	case 3:
498	case 5:
499	case 7:
500		DRM_ERROR("odd number of pipes!\n");
501		break;
502	case 2:
503		swizzle_pipe[0] = 0;
504		swizzle_pipe[1] = 1;
505		break;
506	case 4:
507		if (force_no_swizzle) {
508			swizzle_pipe[0] = 0;
509			swizzle_pipe[1] = 1;
510			swizzle_pipe[2] = 2;
511			swizzle_pipe[3] = 3;
512		} else {
513			swizzle_pipe[0] = 0;
514			swizzle_pipe[1] = 2;
515			swizzle_pipe[2] = 1;
516			swizzle_pipe[3] = 3;
517		}
518		break;
519	case 6:
520		if (force_no_swizzle) {
521			swizzle_pipe[0] = 0;
522			swizzle_pipe[1] = 1;
523			swizzle_pipe[2] = 2;
524			swizzle_pipe[3] = 3;
525			swizzle_pipe[4] = 4;
526			swizzle_pipe[5] = 5;
527		} else {
528			swizzle_pipe[0] = 0;
529			swizzle_pipe[1] = 2;
530			swizzle_pipe[2] = 4;
531			swizzle_pipe[3] = 1;
532			swizzle_pipe[4] = 3;
533			swizzle_pipe[5] = 5;
534		}
535		break;
536	case 8:
537		if (force_no_swizzle) {
538			swizzle_pipe[0] = 0;
539			swizzle_pipe[1] = 1;
540			swizzle_pipe[2] = 2;
541			swizzle_pipe[3] = 3;
542			swizzle_pipe[4] = 4;
543			swizzle_pipe[5] = 5;
544			swizzle_pipe[6] = 6;
545			swizzle_pipe[7] = 7;
546		} else {
547			swizzle_pipe[0] = 0;
548			swizzle_pipe[1] = 2;
549			swizzle_pipe[2] = 4;
550			swizzle_pipe[3] = 6;
551			swizzle_pipe[4] = 1;
552			swizzle_pipe[5] = 3;
553			swizzle_pipe[6] = 5;
554			swizzle_pipe[7] = 7;
555		}
556		break;
557	}
558
559	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
560		while (((1 << cur_backend) & enabled_backends_mask) == 0)
561			cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS;
562
563		backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
564
565		cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS;
566	}
567
568	return backend_map;
569}
570
571static void cayman_program_channel_remap(struct radeon_device *rdev)
572{
573	u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp;
574
575	tmp = RREG32(MC_SHARED_CHMAP);
576	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
577	case 0:
578	case 1:
579	case 2:
580	case 3:
581	default:
582		/* default mapping */
583		mc_shared_chremap = 0x00fac688;
584		break;
585	}
586
587	switch (rdev->family) {
588	case CHIP_CAYMAN:
589	default:
590		//tcp_chan_steer_lo = 0x54763210
591		tcp_chan_steer_lo = 0x76543210;
592		tcp_chan_steer_hi = 0x0000ba98;
593		break;
594	}
595
596	WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo);
597	WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi);
598	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
599}
600
601static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev,
602					    u32 disable_mask_per_se,
603					    u32 max_disable_mask_per_se,
604					    u32 num_shader_engines)
605{
606	u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
607	u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
608
609	if (num_shader_engines == 1)
610		return disable_mask_per_asic;
611	else if (num_shader_engines == 2)
612		return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
613	else
614		return 0xffffffff;
615}
616
617static void cayman_gpu_init(struct radeon_device *rdev)
618{
619	u32 cc_rb_backend_disable = 0;
620	u32 cc_gc_shader_pipe_config;
621	u32 gb_addr_config = 0;
622	u32 mc_shared_chmap, mc_arb_ramcfg;
623	u32 gb_backend_map;
624	u32 cgts_tcc_disable;
625	u32 sx_debug_1;
626	u32 smx_dc_ctl0;
627	u32 gc_user_shader_pipe_config;
628	u32 gc_user_rb_backend_disable;
629	u32 cgts_user_tcc_disable;
630	u32 cgts_sm_ctrl_reg;
631	u32 hdp_host_path_cntl;
632	u32 tmp;
633	int i, j;
634
635	switch (rdev->family) {
636	case CHIP_CAYMAN:
637	default:
638		rdev->config.cayman.max_shader_engines = 2;
639		rdev->config.cayman.max_pipes_per_simd = 4;
640		rdev->config.cayman.max_tile_pipes = 8;
641		rdev->config.cayman.max_simds_per_se = 12;
642		rdev->config.cayman.max_backends_per_se = 4;
643		rdev->config.cayman.max_texture_channel_caches = 8;
644		rdev->config.cayman.max_gprs = 256;
645		rdev->config.cayman.max_threads = 256;
646		rdev->config.cayman.max_gs_threads = 32;
647		rdev->config.cayman.max_stack_entries = 512;
648		rdev->config.cayman.sx_num_of_sets = 8;
649		rdev->config.cayman.sx_max_export_size = 256;
650		rdev->config.cayman.sx_max_export_pos_size = 64;
651		rdev->config.cayman.sx_max_export_smx_size = 192;
652		rdev->config.cayman.max_hw_contexts = 8;
653		rdev->config.cayman.sq_num_cf_insts = 2;
654
655		rdev->config.cayman.sc_prim_fifo_size = 0x100;
656		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
657		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
658		break;
659	}
660
661	/* Initialize HDP */
662	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
663		WREG32((0x2c14 + j), 0x00000000);
664		WREG32((0x2c18 + j), 0x00000000);
665		WREG32((0x2c1c + j), 0x00000000);
666		WREG32((0x2c20 + j), 0x00000000);
667		WREG32((0x2c24 + j), 0x00000000);
668	}
669
670	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
671
672	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
673	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
674
675	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
676	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
677	cgts_tcc_disable = 0xff000000;
678	gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
679	gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG);
680	cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
681
682	rdev->config.cayman.num_shader_engines = rdev->config.cayman.max_shader_engines;
683	tmp = ((~gc_user_shader_pipe_config) & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
684	rdev->config.cayman.num_shader_pipes_per_simd = r600_count_pipe_bits(tmp);
685	rdev->config.cayman.num_tile_pipes = rdev->config.cayman.max_tile_pipes;
686	tmp = ((~gc_user_shader_pipe_config) & INACTIVE_SIMDS_MASK) >> INACTIVE_SIMDS_SHIFT;
687	rdev->config.cayman.num_simds_per_se = r600_count_pipe_bits(tmp);
688	tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
689	rdev->config.cayman.num_backends_per_se = r600_count_pipe_bits(tmp);
690	tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
691	rdev->config.cayman.backend_disable_mask_per_asic =
692		cayman_get_disable_mask_per_asic(rdev, tmp, CAYMAN_MAX_BACKENDS_PER_SE_MASK,
693						 rdev->config.cayman.num_shader_engines);
694	rdev->config.cayman.backend_map =
695		cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes,
696						    rdev->config.cayman.num_backends_per_se *
697						    rdev->config.cayman.num_shader_engines,
698						    &rdev->config.cayman.backend_disable_mask_per_asic,
699						    rdev->config.cayman.num_shader_engines);
700	tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
701	rdev->config.cayman.num_texture_channel_caches = r600_count_pipe_bits(tmp);
702	tmp = (mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT;
703	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
704	if (rdev->config.cayman.mem_max_burst_length_bytes > 512)
705		rdev->config.cayman.mem_max_burst_length_bytes = 512;
706	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
707	rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
708	if (rdev->config.cayman.mem_row_size_in_kb > 4)
709		rdev->config.cayman.mem_row_size_in_kb = 4;
710	/* XXX use MC settings? */
711	rdev->config.cayman.shader_engine_tile_size = 32;
712	rdev->config.cayman.num_gpus = 1;
713	rdev->config.cayman.multi_gpu_tile_size = 64;
714
715	//gb_addr_config = 0x02011003
716#if 0
717	gb_addr_config = RREG32(GB_ADDR_CONFIG);
718#else
719	gb_addr_config = 0;
720	switch (rdev->config.cayman.num_tile_pipes) {
721	case 1:
722	default:
723		gb_addr_config |= NUM_PIPES(0);
724		break;
725	case 2:
726		gb_addr_config |= NUM_PIPES(1);
727		break;
728	case 4:
729		gb_addr_config |= NUM_PIPES(2);
730		break;
731	case 8:
732		gb_addr_config |= NUM_PIPES(3);
733		break;
734	}
735
736	tmp = (rdev->config.cayman.mem_max_burst_length_bytes / 256) - 1;
737	gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
738	gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.cayman.num_shader_engines - 1);
739	tmp = (rdev->config.cayman.shader_engine_tile_size / 16) - 1;
740	gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
741	switch (rdev->config.cayman.num_gpus) {
742	case 1:
743	default:
744		gb_addr_config |= NUM_GPUS(0);
745		break;
746	case 2:
747		gb_addr_config |= NUM_GPUS(1);
748		break;
749	case 4:
750		gb_addr_config |= NUM_GPUS(2);
751		break;
752	}
753	switch (rdev->config.cayman.multi_gpu_tile_size) {
754	case 16:
755		gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
756		break;
757	case 32:
758	default:
759		gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
760		break;
761	case 64:
762		gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
763		break;
764	case 128:
765		gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
766		break;
767	}
768	switch (rdev->config.cayman.mem_row_size_in_kb) {
769	case 1:
770	default:
771		gb_addr_config |= ROW_SIZE(0);
772		break;
773	case 2:
774		gb_addr_config |= ROW_SIZE(1);
775		break;
776	case 4:
777		gb_addr_config |= ROW_SIZE(2);
778		break;
779	}
780#endif
781
782	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
783	rdev->config.cayman.num_tile_pipes = (1 << tmp);
784	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
785	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
786	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
787	rdev->config.cayman.num_shader_engines = tmp + 1;
788	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
789	rdev->config.cayman.num_gpus = tmp + 1;
790	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
791	rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
792	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
793	rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
794
795	//gb_backend_map = 0x76541032;
796#if 0
797	gb_backend_map = RREG32(GB_BACKEND_MAP);
798#else
799	gb_backend_map =
800		cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes,
801						    rdev->config.cayman.num_backends_per_se *
802						    rdev->config.cayman.num_shader_engines,
803						    &rdev->config.cayman.backend_disable_mask_per_asic,
804						    rdev->config.cayman.num_shader_engines);
805#endif
806	/* setup tiling info dword.  gb_addr_config is not adequate since it does
807	 * not have bank info, so create a custom tiling dword.
808	 * bits 3:0   num_pipes
809	 * bits 7:4   num_banks
810	 * bits 11:8  group_size
811	 * bits 15:12 row_size
812	 */
813	rdev->config.cayman.tile_config = 0;
814	switch (rdev->config.cayman.num_tile_pipes) {
815	case 1:
816	default:
817		rdev->config.cayman.tile_config |= (0 << 0);
818		break;
819	case 2:
820		rdev->config.cayman.tile_config |= (1 << 0);
821		break;
822	case 4:
823		rdev->config.cayman.tile_config |= (2 << 0);
824		break;
825	case 8:
826		rdev->config.cayman.tile_config |= (3 << 0);
827		break;
828	}
829	rdev->config.cayman.tile_config |=
830		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
831	rdev->config.cayman.tile_config |=
832		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
833	rdev->config.cayman.tile_config |=
834		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
835
836	rdev->config.cayman.backend_map = gb_backend_map;
837	WREG32(GB_BACKEND_MAP, gb_backend_map);
838	WREG32(GB_ADDR_CONFIG, gb_addr_config);
839	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
840	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
841
842	cayman_program_channel_remap(rdev);
843
844	/* primary versions */
845	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
846	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
847	WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
848
849	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
850	WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
851
852	/* user versions */
853	WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
854	WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
855	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
856
857	WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
858	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
859
860	/* reprogram the shader complex */
861	cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
862	for (i = 0; i < 16; i++)
863		WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
864	WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
865
866	/* set HW defaults for 3D engine */
867	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
868
869	sx_debug_1 = RREG32(SX_DEBUG_1);
870	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
871	WREG32(SX_DEBUG_1, sx_debug_1);
872
873	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
874	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
875	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
876	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
877
878	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
879
880	/* need to be explicitly zero-ed */
881	WREG32(VGT_OFFCHIP_LDS_BASE, 0);
882	WREG32(SQ_LSTMP_RING_BASE, 0);
883	WREG32(SQ_HSTMP_RING_BASE, 0);
884	WREG32(SQ_ESTMP_RING_BASE, 0);
885	WREG32(SQ_GSTMP_RING_BASE, 0);
886	WREG32(SQ_VSTMP_RING_BASE, 0);
887	WREG32(SQ_PSTMP_RING_BASE, 0);
888
889	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
890
891	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
892					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
893					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
894
895	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
896				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
897				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
898
899
900	WREG32(VGT_NUM_INSTANCES, 1);
901
902	WREG32(CP_PERFMON_CNTL, 0);
903
904	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
905				  FETCH_FIFO_HIWATER(0x4) |
906				  DONE_FIFO_HIWATER(0xe0) |
907				  ALU_UPDATE_FIFO_HIWATER(0x8)));
908
909	WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
910	WREG32(SQ_CONFIG, (VC_ENABLE |
911			   EXPORT_SRC_C |
912			   GFX_PRIO(0) |
913			   CS1_PRIO(0) |
914			   CS2_PRIO(1)));
915	WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
916
917	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
918					  FORCE_EOV_MAX_REZ_CNT(255)));
919
920	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
921	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
922
923	WREG32(VGT_GS_VERTEX_REUSE, 16);
924	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
925
926	WREG32(CB_PERF_CTR0_SEL_0, 0);
927	WREG32(CB_PERF_CTR0_SEL_1, 0);
928	WREG32(CB_PERF_CTR1_SEL_0, 0);
929	WREG32(CB_PERF_CTR1_SEL_1, 0);
930	WREG32(CB_PERF_CTR2_SEL_0, 0);
931	WREG32(CB_PERF_CTR2_SEL_1, 0);
932	WREG32(CB_PERF_CTR3_SEL_0, 0);
933	WREG32(CB_PERF_CTR3_SEL_1, 0);
934
935	tmp = RREG32(HDP_MISC_CNTL);
936	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
937	WREG32(HDP_MISC_CNTL, tmp);
938
939	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
940	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
941
942	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
943
944	udelay(50);
945}
946
947/*
948 * GART
949 */
950void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
951{
952	/* flush hdp cache */
953	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
954
955	/* bits 0-7 are the VM contexts0-7 */
956	WREG32(VM_INVALIDATE_REQUEST, 1);
957}
958
959int cayman_pcie_gart_enable(struct radeon_device *rdev)
960{
961	int r;
962
963	if (rdev->gart.table.vram.robj == NULL) {
964		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
965		return -EINVAL;
966	}
967	r = radeon_gart_table_vram_pin(rdev);
968	if (r)
969		return r;
970	radeon_gart_restore(rdev);
971	/* Setup TLB control */
972	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB |
973	       ENABLE_L1_FRAGMENT_PROCESSING |
974	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
975	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
976	/* Setup L2 cache */
977	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
978	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
979	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
980	       EFFECTIVE_L2_QUEUE_SIZE(7) |
981	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
982	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
983	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
984	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
985	/* setup context0 */
986	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
987	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
988	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
989	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
990			(u32)(rdev->dummy_page.addr >> 12));
991	WREG32(VM_CONTEXT0_CNTL2, 0);
992	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
993				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
994	/* disable context1-7 */
995	WREG32(VM_CONTEXT1_CNTL2, 0);
996	WREG32(VM_CONTEXT1_CNTL, 0);
997
998	cayman_pcie_gart_tlb_flush(rdev);
999	rdev->gart.ready = true;
1000	return 0;
1001}
1002
1003void cayman_pcie_gart_disable(struct radeon_device *rdev)
1004{
1005	int r;
1006
1007	/* Disable all tables */
1008	WREG32(VM_CONTEXT0_CNTL, 0);
1009	WREG32(VM_CONTEXT1_CNTL, 0);
1010	/* Setup TLB control */
1011	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1012	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1013	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1014	/* Setup L2 cache */
1015	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1016	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1017	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1018	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1019	WREG32(VM_L2_CNTL2, 0);
1020	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1021	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1022	if (rdev->gart.table.vram.robj) {
1023		r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
1024		if (likely(r == 0)) {
1025			radeon_bo_kunmap(rdev->gart.table.vram.robj);
1026			radeon_bo_unpin(rdev->gart.table.vram.robj);
1027			radeon_bo_unreserve(rdev->gart.table.vram.robj);
1028		}
1029	}
1030}
1031
1032void cayman_pcie_gart_fini(struct radeon_device *rdev)
1033{
1034	cayman_pcie_gart_disable(rdev);
1035	radeon_gart_table_vram_free(rdev);
1036	radeon_gart_fini(rdev);
1037}
1038
1039/*
1040 * CP.
1041 */
1042static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1043{
1044	if (enable)
1045		WREG32(CP_ME_CNTL, 0);
1046	else {
1047		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1048		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1049		WREG32(SCRATCH_UMSK, 0);
1050	}
1051}
1052
1053static int cayman_cp_load_microcode(struct radeon_device *rdev)
1054{
1055	const __be32 *fw_data;
1056	int i;
1057
1058	if (!rdev->me_fw || !rdev->pfp_fw)
1059		return -EINVAL;
1060
1061	cayman_cp_enable(rdev, false);
1062
1063	fw_data = (const __be32 *)rdev->pfp_fw->data;
1064	WREG32(CP_PFP_UCODE_ADDR, 0);
1065	for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1066		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1067	WREG32(CP_PFP_UCODE_ADDR, 0);
1068
1069	fw_data = (const __be32 *)rdev->me_fw->data;
1070	WREG32(CP_ME_RAM_WADDR, 0);
1071	for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1072		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1073
1074	WREG32(CP_PFP_UCODE_ADDR, 0);
1075	WREG32(CP_ME_RAM_WADDR, 0);
1076	WREG32(CP_ME_RAM_RADDR, 0);
1077	return 0;
1078}
1079
1080static int cayman_cp_start(struct radeon_device *rdev)
1081{
1082	int r, i;
1083
1084	r = radeon_ring_lock(rdev, 7);
1085	if (r) {
1086		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1087		return r;
1088	}
1089	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
1090	radeon_ring_write(rdev, 0x1);
1091	radeon_ring_write(rdev, 0x0);
1092	radeon_ring_write(rdev, rdev->config.cayman.max_hw_contexts - 1);
1093	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1094	radeon_ring_write(rdev, 0);
1095	radeon_ring_write(rdev, 0);
1096	radeon_ring_unlock_commit(rdev);
1097
1098	cayman_cp_enable(rdev, true);
1099
1100	r = radeon_ring_lock(rdev, cayman_default_size + 19);
1101	if (r) {
1102		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1103		return r;
1104	}
1105
1106	/* setup clear context state */
1107	radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1108	radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1109
1110	for (i = 0; i < cayman_default_size; i++)
1111		radeon_ring_write(rdev, cayman_default_state[i]);
1112
1113	radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1114	radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE);
1115
1116	/* set clear context state */
1117	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
1118	radeon_ring_write(rdev, 0);
1119
1120	/* SQ_VTX_BASE_VTX_LOC */
1121	radeon_ring_write(rdev, 0xc0026f00);
1122	radeon_ring_write(rdev, 0x00000000);
1123	radeon_ring_write(rdev, 0x00000000);
1124	radeon_ring_write(rdev, 0x00000000);
1125
1126	/* Clear consts */
1127	radeon_ring_write(rdev, 0xc0036f00);
1128	radeon_ring_write(rdev, 0x00000bc4);
1129	radeon_ring_write(rdev, 0xffffffff);
1130	radeon_ring_write(rdev, 0xffffffff);
1131	radeon_ring_write(rdev, 0xffffffff);
1132
1133	radeon_ring_write(rdev, 0xc0026900);
1134	radeon_ring_write(rdev, 0x00000316);
1135	radeon_ring_write(rdev, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1136	radeon_ring_write(rdev, 0x00000010); /*  */
1137
1138	radeon_ring_unlock_commit(rdev);
1139
1140	/* XXX init other rings */
1141
1142	return 0;
1143}
1144
1145static void cayman_cp_fini(struct radeon_device *rdev)
1146{
1147	cayman_cp_enable(rdev, false);
1148	radeon_ring_fini(rdev);
1149}
1150
1151int cayman_cp_resume(struct radeon_device *rdev)
1152{
1153	u32 tmp;
1154	u32 rb_bufsz;
1155	int r;
1156
1157	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1158	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1159				 SOFT_RESET_PA |
1160				 SOFT_RESET_SH |
1161				 SOFT_RESET_VGT |
1162				 SOFT_RESET_SPI |
1163				 SOFT_RESET_SX));
1164	RREG32(GRBM_SOFT_RESET);
1165	mdelay(15);
1166	WREG32(GRBM_SOFT_RESET, 0);
1167	RREG32(GRBM_SOFT_RESET);
1168
1169	WREG32(CP_SEM_WAIT_TIMER, 0x4);
1170
1171	/* Set the write pointer delay */
1172	WREG32(CP_RB_WPTR_DELAY, 0);
1173
1174	WREG32(CP_DEBUG, (1 << 27));
1175
1176	/* ring 0 - compute and gfx */
1177	/* Set ring buffer size */
1178	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
1179	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1180#ifdef __BIG_ENDIAN
1181	tmp |= BUF_SWAP_32BIT;
1182#endif
1183	WREG32(CP_RB0_CNTL, tmp);
1184
1185	/* Initialize the ring buffer's read and write pointers */
1186	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1187	WREG32(CP_RB0_WPTR, 0);
1188
1189	/* set the wb address wether it's enabled or not */
1190	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1191	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1192	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1193
1194	if (rdev->wb.enabled)
1195		WREG32(SCRATCH_UMSK, 0xff);
1196	else {
1197		tmp |= RB_NO_UPDATE;
1198		WREG32(SCRATCH_UMSK, 0);
1199	}
1200
1201	mdelay(1);
1202	WREG32(CP_RB0_CNTL, tmp);
1203
1204	WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8);
1205
1206	rdev->cp.rptr = RREG32(CP_RB0_RPTR);
1207	rdev->cp.wptr = RREG32(CP_RB0_WPTR);
1208
1209	/* ring1  - compute only */
1210	/* Set ring buffer size */
1211	rb_bufsz = drm_order(rdev->cp1.ring_size / 8);
1212	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1213#ifdef __BIG_ENDIAN
1214	tmp |= BUF_SWAP_32BIT;
1215#endif
1216	WREG32(CP_RB1_CNTL, tmp);
1217
1218	/* Initialize the ring buffer's read and write pointers */
1219	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
1220	WREG32(CP_RB1_WPTR, 0);
1221
1222	/* set the wb address wether it's enabled or not */
1223	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
1224	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
1225
1226	mdelay(1);
1227	WREG32(CP_RB1_CNTL, tmp);
1228
1229	WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8);
1230
1231	rdev->cp1.rptr = RREG32(CP_RB1_RPTR);
1232	rdev->cp1.wptr = RREG32(CP_RB1_WPTR);
1233
1234	/* ring2 - compute only */
1235	/* Set ring buffer size */
1236	rb_bufsz = drm_order(rdev->cp2.ring_size / 8);
1237	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1238#ifdef __BIG_ENDIAN
1239	tmp |= BUF_SWAP_32BIT;
1240#endif
1241	WREG32(CP_RB2_CNTL, tmp);
1242
1243	/* Initialize the ring buffer's read and write pointers */
1244	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
1245	WREG32(CP_RB2_WPTR, 0);
1246
1247	/* set the wb address wether it's enabled or not */
1248	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
1249	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
1250
1251	mdelay(1);
1252	WREG32(CP_RB2_CNTL, tmp);
1253
1254	WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8);
1255
1256	rdev->cp2.rptr = RREG32(CP_RB2_RPTR);
1257	rdev->cp2.wptr = RREG32(CP_RB2_WPTR);
1258
1259	/* start the rings */
1260	cayman_cp_start(rdev);
1261	rdev->cp.ready = true;
1262	rdev->cp1.ready = true;
1263	rdev->cp2.ready = true;
1264	/* this only test cp0 */
1265	r = radeon_ring_test(rdev);
1266	if (r) {
1267		rdev->cp.ready = false;
1268		rdev->cp1.ready = false;
1269		rdev->cp2.ready = false;
1270		return r;
1271	}
1272
1273	return 0;
1274}
1275
1276bool cayman_gpu_is_lockup(struct radeon_device *rdev)
1277{
1278	u32 srbm_status;
1279	u32 grbm_status;
1280	u32 grbm_status_se0, grbm_status_se1;
1281	struct r100_gpu_lockup *lockup = &rdev->config.cayman.lockup;
1282	int r;
1283
1284	srbm_status = RREG32(SRBM_STATUS);
1285	grbm_status = RREG32(GRBM_STATUS);
1286	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1287	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1288	if (!(grbm_status & GUI_ACTIVE)) {
1289		r100_gpu_lockup_update(lockup, &rdev->cp);
1290		return false;
1291	}
1292	/* force CP activities */
1293	r = radeon_ring_lock(rdev, 2);
1294	if (!r) {
1295		/* PACKET2 NOP */
1296		radeon_ring_write(rdev, 0x80000000);
1297		radeon_ring_write(rdev, 0x80000000);
1298		radeon_ring_unlock_commit(rdev);
1299	}
1300	/* XXX deal with CP0,1,2 */
1301	rdev->cp.rptr = RREG32(CP_RB0_RPTR);
1302	return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
1303}
1304
1305static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1306{
1307	struct evergreen_mc_save save;
1308	u32 grbm_reset = 0;
1309
1310	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1311		return 0;
1312
1313	dev_info(rdev->dev, "GPU softreset \n");
1314	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
1315		RREG32(GRBM_STATUS));
1316	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
1317		RREG32(GRBM_STATUS_SE0));
1318	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
1319		RREG32(GRBM_STATUS_SE1));
1320	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
1321		RREG32(SRBM_STATUS));
1322	evergreen_mc_stop(rdev, &save);
1323	if (evergreen_mc_wait_for_idle(rdev)) {
1324		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1325	}
1326	/* Disable CP parsing/prefetching */
1327	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1328
1329	/* reset all the gfx blocks */
1330	grbm_reset = (SOFT_RESET_CP |
1331		      SOFT_RESET_CB |
1332		      SOFT_RESET_DB |
1333		      SOFT_RESET_GDS |
1334		      SOFT_RESET_PA |
1335		      SOFT_RESET_SC |
1336		      SOFT_RESET_SPI |
1337		      SOFT_RESET_SH |
1338		      SOFT_RESET_SX |
1339		      SOFT_RESET_TC |
1340		      SOFT_RESET_TA |
1341		      SOFT_RESET_VGT |
1342		      SOFT_RESET_IA);
1343
1344	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1345	WREG32(GRBM_SOFT_RESET, grbm_reset);
1346	(void)RREG32(GRBM_SOFT_RESET);
1347	udelay(50);
1348	WREG32(GRBM_SOFT_RESET, 0);
1349	(void)RREG32(GRBM_SOFT_RESET);
1350	/* Wait a little for things to settle down */
1351	udelay(50);
1352	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
1353		RREG32(GRBM_STATUS));
1354	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
1355		RREG32(GRBM_STATUS_SE0));
1356	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
1357		RREG32(GRBM_STATUS_SE1));
1358	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
1359		RREG32(SRBM_STATUS));
1360	evergreen_mc_resume(rdev, &save);
1361	return 0;
1362}
1363
1364int cayman_asic_reset(struct radeon_device *rdev)
1365{
1366	return cayman_gpu_soft_reset(rdev);
1367}
1368
1369static int cayman_startup(struct radeon_device *rdev)
1370{
1371	int r;
1372
1373	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1374		r = ni_init_microcode(rdev);
1375		if (r) {
1376			DRM_ERROR("Failed to load firmware!\n");
1377			return r;
1378		}
1379	}
1380	r = ni_mc_load_microcode(rdev);
1381	if (r) {
1382		DRM_ERROR("Failed to load MC firmware!\n");
1383		return r;
1384	}
1385
1386	evergreen_mc_program(rdev);
1387	r = cayman_pcie_gart_enable(rdev);
1388	if (r)
1389		return r;
1390	cayman_gpu_init(rdev);
1391
1392	r = evergreen_blit_init(rdev);
1393	if (r) {
1394		evergreen_blit_fini(rdev);
1395		rdev->asic->copy = NULL;
1396		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1397	}
1398
1399	/* allocate wb buffer */
1400	r = radeon_wb_init(rdev);
1401	if (r)
1402		return r;
1403
1404	/* Enable IRQ */
1405	r = r600_irq_init(rdev);
1406	if (r) {
1407		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1408		radeon_irq_kms_fini(rdev);
1409		return r;
1410	}
1411	evergreen_irq_set(rdev);
1412
1413	r = radeon_ring_init(rdev, rdev->cp.ring_size);
1414	if (r)
1415		return r;
1416	r = cayman_cp_load_microcode(rdev);
1417	if (r)
1418		return r;
1419	r = cayman_cp_resume(rdev);
1420	if (r)
1421		return r;
1422
1423	return 0;
1424}
1425
1426int cayman_resume(struct radeon_device *rdev)
1427{
1428	int r;
1429
1430	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1431	 * posting will perform necessary task to bring back GPU into good
1432	 * shape.
1433	 */
1434	/* post card */
1435	atom_asic_init(rdev->mode_info.atom_context);
1436
1437	r = cayman_startup(rdev);
1438	if (r) {
1439		DRM_ERROR("cayman startup failed on resume\n");
1440		return r;
1441	}
1442
1443	r = r600_ib_test(rdev);
1444	if (r) {
1445		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
1446		return r;
1447	}
1448
1449	return r;
1450
1451}
1452
1453int cayman_suspend(struct radeon_device *rdev)
1454{
1455	int r;
1456
1457	/* FIXME: we should wait for ring to be empty */
1458	cayman_cp_enable(rdev, false);
1459	rdev->cp.ready = false;
1460	evergreen_irq_suspend(rdev);
1461	radeon_wb_disable(rdev);
1462	cayman_pcie_gart_disable(rdev);
1463
1464	/* unpin shaders bo */
1465	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
1466	if (likely(r == 0)) {
1467		radeon_bo_unpin(rdev->r600_blit.shader_obj);
1468		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
1469	}
1470
1471	return 0;
1472}
1473
1474/* Plan is to move initialization in that function and use
1475 * helper function so that radeon_device_init pretty much
1476 * do nothing more than calling asic specific function. This
1477 * should also allow to remove a bunch of callback function
1478 * like vram_info.
1479 */
1480int cayman_init(struct radeon_device *rdev)
1481{
1482	int r;
1483
1484	/* This don't do much */
1485	r = radeon_gem_init(rdev);
1486	if (r)
1487		return r;
1488	/* Read BIOS */
1489	if (!radeon_get_bios(rdev)) {
1490		if (ASIC_IS_AVIVO(rdev))
1491			return -EINVAL;
1492	}
1493	/* Must be an ATOMBIOS */
1494	if (!rdev->is_atom_bios) {
1495		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
1496		return -EINVAL;
1497	}
1498	r = radeon_atombios_init(rdev);
1499	if (r)
1500		return r;
1501
1502	/* Post card if necessary */
1503	if (!radeon_card_posted(rdev)) {
1504		if (!rdev->bios) {
1505			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1506			return -EINVAL;
1507		}
1508		DRM_INFO("GPU not posted. posting now...\n");
1509		atom_asic_init(rdev->mode_info.atom_context);
1510	}
1511	/* Initialize scratch registers */
1512	r600_scratch_init(rdev);
1513	/* Initialize surface registers */
1514	radeon_surface_init(rdev);
1515	/* Initialize clocks */
1516	radeon_get_clock_info(rdev->ddev);
1517	/* Fence driver */
1518	r = radeon_fence_driver_init(rdev);
1519	if (r)
1520		return r;
1521	/* initialize memory controller */
1522	r = evergreen_mc_init(rdev);
1523	if (r)
1524		return r;
1525	/* Memory manager */
1526	r = radeon_bo_init(rdev);
1527	if (r)
1528		return r;
1529
1530	r = radeon_irq_kms_init(rdev);
1531	if (r)
1532		return r;
1533
1534	rdev->cp.ring_obj = NULL;
1535	r600_ring_init(rdev, 1024 * 1024);
1536
1537	rdev->ih.ring_obj = NULL;
1538	r600_ih_ring_init(rdev, 64 * 1024);
1539
1540	r = r600_pcie_gart_init(rdev);
1541	if (r)
1542		return r;
1543
1544	rdev->accel_working = true;
1545	r = cayman_startup(rdev);
1546	if (r) {
1547		dev_err(rdev->dev, "disabling GPU acceleration\n");
1548		cayman_cp_fini(rdev);
1549		r600_irq_fini(rdev);
1550		radeon_wb_fini(rdev);
1551		radeon_irq_kms_fini(rdev);
1552		cayman_pcie_gart_fini(rdev);
1553		rdev->accel_working = false;
1554	}
1555	if (rdev->accel_working) {
1556		r = radeon_ib_pool_init(rdev);
1557		if (r) {
1558			DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r);
1559			rdev->accel_working = false;
1560		}
1561		r = r600_ib_test(rdev);
1562		if (r) {
1563			DRM_ERROR("radeon: failed testing IB (%d).\n", r);
1564			rdev->accel_working = false;
1565		}
1566	}
1567
1568	/* Don't start up if the MC ucode is missing.
1569	 * The default clocks and voltages before the MC ucode
1570	 * is loaded are not suffient for advanced operations.
1571	 */
1572	if (!rdev->mc_fw) {
1573		DRM_ERROR("radeon: MC ucode required for NI+.\n");
1574		return -EINVAL;
1575	}
1576
1577	return 0;
1578}
1579
1580void cayman_fini(struct radeon_device *rdev)
1581{
1582	evergreen_blit_fini(rdev);
1583	cayman_cp_fini(rdev);
1584	r600_irq_fini(rdev);
1585	radeon_wb_fini(rdev);
1586	radeon_ib_pool_fini(rdev);
1587	radeon_irq_kms_fini(rdev);
1588	cayman_pcie_gart_fini(rdev);
1589	radeon_gem_fini(rdev);
1590	radeon_fence_driver_fini(rdev);
1591	radeon_bo_fini(rdev);
1592	radeon_atombios_fini(rdev);
1593	kfree(rdev->bios);
1594	rdev->bios = NULL;
1595}
1596
1597