ni.c revision 5596a9db156107b01ceb7db4d50cc091117da627
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
30#include "radeon_asic.h"
31#include "radeon_drm.h"
32#include "nid.h"
33#include "atom.h"
34#include "ni_reg.h"
35#include "cayman_blit_shaders.h"
36
37extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
38extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
39extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
40extern void evergreen_mc_program(struct radeon_device *rdev);
41extern void evergreen_irq_suspend(struct radeon_device *rdev);
42extern int evergreen_mc_init(struct radeon_device *rdev);
43extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
44extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
45
46#define EVERGREEN_PFP_UCODE_SIZE 1120
47#define EVERGREEN_PM4_UCODE_SIZE 1376
48#define EVERGREEN_RLC_UCODE_SIZE 768
49#define BTC_MC_UCODE_SIZE 6024
50
51#define CAYMAN_PFP_UCODE_SIZE 2176
52#define CAYMAN_PM4_UCODE_SIZE 2176
53#define CAYMAN_RLC_UCODE_SIZE 1024
54#define CAYMAN_MC_UCODE_SIZE 6037
55
56/* Firmware Names */
57MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
58MODULE_FIRMWARE("radeon/BARTS_me.bin");
59MODULE_FIRMWARE("radeon/BARTS_mc.bin");
60MODULE_FIRMWARE("radeon/BTC_rlc.bin");
61MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
62MODULE_FIRMWARE("radeon/TURKS_me.bin");
63MODULE_FIRMWARE("radeon/TURKS_mc.bin");
64MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
65MODULE_FIRMWARE("radeon/CAICOS_me.bin");
66MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
67MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
68MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
69MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
70MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
71
72#define BTC_IO_MC_REGS_SIZE 29
73
74static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
75	{0x00000077, 0xff010100},
76	{0x00000078, 0x00000000},
77	{0x00000079, 0x00001434},
78	{0x0000007a, 0xcc08ec08},
79	{0x0000007b, 0x00040000},
80	{0x0000007c, 0x000080c0},
81	{0x0000007d, 0x09000000},
82	{0x0000007e, 0x00210404},
83	{0x00000081, 0x08a8e800},
84	{0x00000082, 0x00030444},
85	{0x00000083, 0x00000000},
86	{0x00000085, 0x00000001},
87	{0x00000086, 0x00000002},
88	{0x00000087, 0x48490000},
89	{0x00000088, 0x20244647},
90	{0x00000089, 0x00000005},
91	{0x0000008b, 0x66030000},
92	{0x0000008c, 0x00006603},
93	{0x0000008d, 0x00000100},
94	{0x0000008f, 0x00001c0a},
95	{0x00000090, 0xff000001},
96	{0x00000094, 0x00101101},
97	{0x00000095, 0x00000fff},
98	{0x00000096, 0x00116fff},
99	{0x00000097, 0x60010000},
100	{0x00000098, 0x10010000},
101	{0x00000099, 0x00006000},
102	{0x0000009a, 0x00001000},
103	{0x0000009f, 0x00946a00}
104};
105
106static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
107	{0x00000077, 0xff010100},
108	{0x00000078, 0x00000000},
109	{0x00000079, 0x00001434},
110	{0x0000007a, 0xcc08ec08},
111	{0x0000007b, 0x00040000},
112	{0x0000007c, 0x000080c0},
113	{0x0000007d, 0x09000000},
114	{0x0000007e, 0x00210404},
115	{0x00000081, 0x08a8e800},
116	{0x00000082, 0x00030444},
117	{0x00000083, 0x00000000},
118	{0x00000085, 0x00000001},
119	{0x00000086, 0x00000002},
120	{0x00000087, 0x48490000},
121	{0x00000088, 0x20244647},
122	{0x00000089, 0x00000005},
123	{0x0000008b, 0x66030000},
124	{0x0000008c, 0x00006603},
125	{0x0000008d, 0x00000100},
126	{0x0000008f, 0x00001c0a},
127	{0x00000090, 0xff000001},
128	{0x00000094, 0x00101101},
129	{0x00000095, 0x00000fff},
130	{0x00000096, 0x00116fff},
131	{0x00000097, 0x60010000},
132	{0x00000098, 0x10010000},
133	{0x00000099, 0x00006000},
134	{0x0000009a, 0x00001000},
135	{0x0000009f, 0x00936a00}
136};
137
138static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
139	{0x00000077, 0xff010100},
140	{0x00000078, 0x00000000},
141	{0x00000079, 0x00001434},
142	{0x0000007a, 0xcc08ec08},
143	{0x0000007b, 0x00040000},
144	{0x0000007c, 0x000080c0},
145	{0x0000007d, 0x09000000},
146	{0x0000007e, 0x00210404},
147	{0x00000081, 0x08a8e800},
148	{0x00000082, 0x00030444},
149	{0x00000083, 0x00000000},
150	{0x00000085, 0x00000001},
151	{0x00000086, 0x00000002},
152	{0x00000087, 0x48490000},
153	{0x00000088, 0x20244647},
154	{0x00000089, 0x00000005},
155	{0x0000008b, 0x66030000},
156	{0x0000008c, 0x00006603},
157	{0x0000008d, 0x00000100},
158	{0x0000008f, 0x00001c0a},
159	{0x00000090, 0xff000001},
160	{0x00000094, 0x00101101},
161	{0x00000095, 0x00000fff},
162	{0x00000096, 0x00116fff},
163	{0x00000097, 0x60010000},
164	{0x00000098, 0x10010000},
165	{0x00000099, 0x00006000},
166	{0x0000009a, 0x00001000},
167	{0x0000009f, 0x00916a00}
168};
169
170static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
171	{0x00000077, 0xff010100},
172	{0x00000078, 0x00000000},
173	{0x00000079, 0x00001434},
174	{0x0000007a, 0xcc08ec08},
175	{0x0000007b, 0x00040000},
176	{0x0000007c, 0x000080c0},
177	{0x0000007d, 0x09000000},
178	{0x0000007e, 0x00210404},
179	{0x00000081, 0x08a8e800},
180	{0x00000082, 0x00030444},
181	{0x00000083, 0x00000000},
182	{0x00000085, 0x00000001},
183	{0x00000086, 0x00000002},
184	{0x00000087, 0x48490000},
185	{0x00000088, 0x20244647},
186	{0x00000089, 0x00000005},
187	{0x0000008b, 0x66030000},
188	{0x0000008c, 0x00006603},
189	{0x0000008d, 0x00000100},
190	{0x0000008f, 0x00001c0a},
191	{0x00000090, 0xff000001},
192	{0x00000094, 0x00101101},
193	{0x00000095, 0x00000fff},
194	{0x00000096, 0x00116fff},
195	{0x00000097, 0x60010000},
196	{0x00000098, 0x10010000},
197	{0x00000099, 0x00006000},
198	{0x0000009a, 0x00001000},
199	{0x0000009f, 0x00976b00}
200};
201
202int ni_mc_load_microcode(struct radeon_device *rdev)
203{
204	const __be32 *fw_data;
205	u32 mem_type, running, blackout = 0;
206	u32 *io_mc_regs;
207	int i, ucode_size, regs_size;
208
209	if (!rdev->mc_fw)
210		return -EINVAL;
211
212	switch (rdev->family) {
213	case CHIP_BARTS:
214		io_mc_regs = (u32 *)&barts_io_mc_regs;
215		ucode_size = BTC_MC_UCODE_SIZE;
216		regs_size = BTC_IO_MC_REGS_SIZE;
217		break;
218	case CHIP_TURKS:
219		io_mc_regs = (u32 *)&turks_io_mc_regs;
220		ucode_size = BTC_MC_UCODE_SIZE;
221		regs_size = BTC_IO_MC_REGS_SIZE;
222		break;
223	case CHIP_CAICOS:
224	default:
225		io_mc_regs = (u32 *)&caicos_io_mc_regs;
226		ucode_size = BTC_MC_UCODE_SIZE;
227		regs_size = BTC_IO_MC_REGS_SIZE;
228		break;
229	case CHIP_CAYMAN:
230		io_mc_regs = (u32 *)&cayman_io_mc_regs;
231		ucode_size = CAYMAN_MC_UCODE_SIZE;
232		regs_size = BTC_IO_MC_REGS_SIZE;
233		break;
234	}
235
236	mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
237	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
238
239	if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
240		if (running) {
241			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
242			WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
243		}
244
245		/* reset the engine and set to writable */
246		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
247		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
248
249		/* load mc io regs */
250		for (i = 0; i < regs_size; i++) {
251			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
252			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
253		}
254		/* load the MC ucode */
255		fw_data = (const __be32 *)rdev->mc_fw->data;
256		for (i = 0; i < ucode_size; i++)
257			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
258
259		/* put the engine back into the active state */
260		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
261		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
262		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
263
264		/* wait for training to complete */
265		for (i = 0; i < rdev->usec_timeout; i++) {
266			if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
267				break;
268			udelay(1);
269		}
270
271		if (running)
272			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
273	}
274
275	return 0;
276}
277
278int ni_init_microcode(struct radeon_device *rdev)
279{
280	struct platform_device *pdev;
281	const char *chip_name;
282	const char *rlc_chip_name;
283	size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
284	char fw_name[30];
285	int err;
286
287	DRM_DEBUG("\n");
288
289	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
290	err = IS_ERR(pdev);
291	if (err) {
292		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
293		return -EINVAL;
294	}
295
296	switch (rdev->family) {
297	case CHIP_BARTS:
298		chip_name = "BARTS";
299		rlc_chip_name = "BTC";
300		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
301		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
302		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
303		mc_req_size = BTC_MC_UCODE_SIZE * 4;
304		break;
305	case CHIP_TURKS:
306		chip_name = "TURKS";
307		rlc_chip_name = "BTC";
308		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
309		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
310		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
311		mc_req_size = BTC_MC_UCODE_SIZE * 4;
312		break;
313	case CHIP_CAICOS:
314		chip_name = "CAICOS";
315		rlc_chip_name = "BTC";
316		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
317		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
318		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
319		mc_req_size = BTC_MC_UCODE_SIZE * 4;
320		break;
321	case CHIP_CAYMAN:
322		chip_name = "CAYMAN";
323		rlc_chip_name = "CAYMAN";
324		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
325		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
326		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
327		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
328		break;
329	default: BUG();
330	}
331
332	DRM_INFO("Loading %s Microcode\n", chip_name);
333
334	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
335	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
336	if (err)
337		goto out;
338	if (rdev->pfp_fw->size != pfp_req_size) {
339		printk(KERN_ERR
340		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
341		       rdev->pfp_fw->size, fw_name);
342		err = -EINVAL;
343		goto out;
344	}
345
346	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
347	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
348	if (err)
349		goto out;
350	if (rdev->me_fw->size != me_req_size) {
351		printk(KERN_ERR
352		       "ni_cp: Bogus length %zu in firmware \"%s\"\n",
353		       rdev->me_fw->size, fw_name);
354		err = -EINVAL;
355	}
356
357	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
358	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
359	if (err)
360		goto out;
361	if (rdev->rlc_fw->size != rlc_req_size) {
362		printk(KERN_ERR
363		       "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
364		       rdev->rlc_fw->size, fw_name);
365		err = -EINVAL;
366	}
367
368	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
369	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
370	if (err)
371		goto out;
372	if (rdev->mc_fw->size != mc_req_size) {
373		printk(KERN_ERR
374		       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
375		       rdev->mc_fw->size, fw_name);
376		err = -EINVAL;
377	}
378out:
379	platform_device_unregister(pdev);
380
381	if (err) {
382		if (err != -EINVAL)
383			printk(KERN_ERR
384			       "ni_cp: Failed to load firmware \"%s\"\n",
385			       fw_name);
386		release_firmware(rdev->pfp_fw);
387		rdev->pfp_fw = NULL;
388		release_firmware(rdev->me_fw);
389		rdev->me_fw = NULL;
390		release_firmware(rdev->rlc_fw);
391		rdev->rlc_fw = NULL;
392		release_firmware(rdev->mc_fw);
393		rdev->mc_fw = NULL;
394	}
395	return err;
396}
397
398/*
399 * Core functions
400 */
401static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
402					       u32 num_tile_pipes,
403					       u32 num_backends_per_asic,
404					       u32 *backend_disable_mask_per_asic,
405					       u32 num_shader_engines)
406{
407	u32 backend_map = 0;
408	u32 enabled_backends_mask = 0;
409	u32 enabled_backends_count = 0;
410	u32 num_backends_per_se;
411	u32 cur_pipe;
412	u32 swizzle_pipe[CAYMAN_MAX_PIPES];
413	u32 cur_backend = 0;
414	u32 i;
415	bool force_no_swizzle;
416
417	/* force legal values */
418	if (num_tile_pipes < 1)
419		num_tile_pipes = 1;
420	if (num_tile_pipes > rdev->config.cayman.max_tile_pipes)
421		num_tile_pipes = rdev->config.cayman.max_tile_pipes;
422	if (num_shader_engines < 1)
423		num_shader_engines = 1;
424	if (num_shader_engines > rdev->config.cayman.max_shader_engines)
425		num_shader_engines = rdev->config.cayman.max_shader_engines;
426	if (num_backends_per_asic < num_shader_engines)
427		num_backends_per_asic = num_shader_engines;
428	if (num_backends_per_asic > (rdev->config.cayman.max_backends_per_se * num_shader_engines))
429		num_backends_per_asic = rdev->config.cayman.max_backends_per_se * num_shader_engines;
430
431	/* make sure we have the same number of backends per se */
432	num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
433	/* set up the number of backends per se */
434	num_backends_per_se = num_backends_per_asic / num_shader_engines;
435	if (num_backends_per_se > rdev->config.cayman.max_backends_per_se) {
436		num_backends_per_se = rdev->config.cayman.max_backends_per_se;
437		num_backends_per_asic = num_backends_per_se * num_shader_engines;
438	}
439
440	/* create enable mask and count for enabled backends */
441	for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
442		if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
443			enabled_backends_mask |= (1 << i);
444			++enabled_backends_count;
445		}
446		if (enabled_backends_count == num_backends_per_asic)
447			break;
448	}
449
450	/* force the backends mask to match the current number of backends */
451	if (enabled_backends_count != num_backends_per_asic) {
452		u32 this_backend_enabled;
453		u32 shader_engine;
454		u32 backend_per_se;
455
456		enabled_backends_mask = 0;
457		enabled_backends_count = 0;
458		*backend_disable_mask_per_asic = CAYMAN_MAX_BACKENDS_MASK;
459		for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
460			/* calc the current se */
461			shader_engine = i / rdev->config.cayman.max_backends_per_se;
462			/* calc the backend per se */
463			backend_per_se = i % rdev->config.cayman.max_backends_per_se;
464			/* default to not enabled */
465			this_backend_enabled = 0;
466			if ((shader_engine < num_shader_engines) &&
467			    (backend_per_se < num_backends_per_se))
468				this_backend_enabled = 1;
469			if (this_backend_enabled) {
470				enabled_backends_mask |= (1 << i);
471				*backend_disable_mask_per_asic &= ~(1 << i);
472				++enabled_backends_count;
473			}
474		}
475	}
476
477
478	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * CAYMAN_MAX_PIPES);
479	switch (rdev->family) {
480	case CHIP_CAYMAN:
481		force_no_swizzle = true;
482		break;
483	default:
484		force_no_swizzle = false;
485		break;
486	}
487	if (force_no_swizzle) {
488		bool last_backend_enabled = false;
489
490		force_no_swizzle = false;
491		for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
492			if (((enabled_backends_mask >> i) & 1) == 1) {
493				if (last_backend_enabled)
494					force_no_swizzle = true;
495				last_backend_enabled = true;
496			} else
497				last_backend_enabled = false;
498		}
499	}
500
501	switch (num_tile_pipes) {
502	case 1:
503	case 3:
504	case 5:
505	case 7:
506		DRM_ERROR("odd number of pipes!\n");
507		break;
508	case 2:
509		swizzle_pipe[0] = 0;
510		swizzle_pipe[1] = 1;
511		break;
512	case 4:
513		if (force_no_swizzle) {
514			swizzle_pipe[0] = 0;
515			swizzle_pipe[1] = 1;
516			swizzle_pipe[2] = 2;
517			swizzle_pipe[3] = 3;
518		} else {
519			swizzle_pipe[0] = 0;
520			swizzle_pipe[1] = 2;
521			swizzle_pipe[2] = 1;
522			swizzle_pipe[3] = 3;
523		}
524		break;
525	case 6:
526		if (force_no_swizzle) {
527			swizzle_pipe[0] = 0;
528			swizzle_pipe[1] = 1;
529			swizzle_pipe[2] = 2;
530			swizzle_pipe[3] = 3;
531			swizzle_pipe[4] = 4;
532			swizzle_pipe[5] = 5;
533		} else {
534			swizzle_pipe[0] = 0;
535			swizzle_pipe[1] = 2;
536			swizzle_pipe[2] = 4;
537			swizzle_pipe[3] = 1;
538			swizzle_pipe[4] = 3;
539			swizzle_pipe[5] = 5;
540		}
541		break;
542	case 8:
543		if (force_no_swizzle) {
544			swizzle_pipe[0] = 0;
545			swizzle_pipe[1] = 1;
546			swizzle_pipe[2] = 2;
547			swizzle_pipe[3] = 3;
548			swizzle_pipe[4] = 4;
549			swizzle_pipe[5] = 5;
550			swizzle_pipe[6] = 6;
551			swizzle_pipe[7] = 7;
552		} else {
553			swizzle_pipe[0] = 0;
554			swizzle_pipe[1] = 2;
555			swizzle_pipe[2] = 4;
556			swizzle_pipe[3] = 6;
557			swizzle_pipe[4] = 1;
558			swizzle_pipe[5] = 3;
559			swizzle_pipe[6] = 5;
560			swizzle_pipe[7] = 7;
561		}
562		break;
563	}
564
565	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
566		while (((1 << cur_backend) & enabled_backends_mask) == 0)
567			cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS;
568
569		backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
570
571		cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS;
572	}
573
574	return backend_map;
575}
576
577static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev,
578					    u32 disable_mask_per_se,
579					    u32 max_disable_mask_per_se,
580					    u32 num_shader_engines)
581{
582	u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
583	u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
584
585	if (num_shader_engines == 1)
586		return disable_mask_per_asic;
587	else if (num_shader_engines == 2)
588		return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
589	else
590		return 0xffffffff;
591}
592
593static void cayman_gpu_init(struct radeon_device *rdev)
594{
595	u32 cc_rb_backend_disable = 0;
596	u32 cc_gc_shader_pipe_config;
597	u32 gb_addr_config = 0;
598	u32 mc_shared_chmap, mc_arb_ramcfg;
599	u32 gb_backend_map;
600	u32 cgts_tcc_disable;
601	u32 sx_debug_1;
602	u32 smx_dc_ctl0;
603	u32 gc_user_shader_pipe_config;
604	u32 gc_user_rb_backend_disable;
605	u32 cgts_user_tcc_disable;
606	u32 cgts_sm_ctrl_reg;
607	u32 hdp_host_path_cntl;
608	u32 tmp;
609	int i, j;
610
611	switch (rdev->family) {
612	case CHIP_CAYMAN:
613	default:
614		rdev->config.cayman.max_shader_engines = 2;
615		rdev->config.cayman.max_pipes_per_simd = 4;
616		rdev->config.cayman.max_tile_pipes = 8;
617		rdev->config.cayman.max_simds_per_se = 12;
618		rdev->config.cayman.max_backends_per_se = 4;
619		rdev->config.cayman.max_texture_channel_caches = 8;
620		rdev->config.cayman.max_gprs = 256;
621		rdev->config.cayman.max_threads = 256;
622		rdev->config.cayman.max_gs_threads = 32;
623		rdev->config.cayman.max_stack_entries = 512;
624		rdev->config.cayman.sx_num_of_sets = 8;
625		rdev->config.cayman.sx_max_export_size = 256;
626		rdev->config.cayman.sx_max_export_pos_size = 64;
627		rdev->config.cayman.sx_max_export_smx_size = 192;
628		rdev->config.cayman.max_hw_contexts = 8;
629		rdev->config.cayman.sq_num_cf_insts = 2;
630
631		rdev->config.cayman.sc_prim_fifo_size = 0x100;
632		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
633		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
634		break;
635	}
636
637	/* Initialize HDP */
638	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
639		WREG32((0x2c14 + j), 0x00000000);
640		WREG32((0x2c18 + j), 0x00000000);
641		WREG32((0x2c1c + j), 0x00000000);
642		WREG32((0x2c20 + j), 0x00000000);
643		WREG32((0x2c24 + j), 0x00000000);
644	}
645
646	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
647
648	evergreen_fix_pci_max_read_req_size(rdev);
649
650	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
651	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
652
653	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
654	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
655	cgts_tcc_disable = 0xff000000;
656	gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
657	gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG);
658	cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
659
660	rdev->config.cayman.num_shader_engines = rdev->config.cayman.max_shader_engines;
661	tmp = ((~gc_user_shader_pipe_config) & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
662	rdev->config.cayman.num_shader_pipes_per_simd = r600_count_pipe_bits(tmp);
663	rdev->config.cayman.num_tile_pipes = rdev->config.cayman.max_tile_pipes;
664	tmp = ((~gc_user_shader_pipe_config) & INACTIVE_SIMDS_MASK) >> INACTIVE_SIMDS_SHIFT;
665	rdev->config.cayman.num_simds_per_se = r600_count_pipe_bits(tmp);
666	tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
667	rdev->config.cayman.num_backends_per_se = r600_count_pipe_bits(tmp);
668	tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
669	rdev->config.cayman.backend_disable_mask_per_asic =
670		cayman_get_disable_mask_per_asic(rdev, tmp, CAYMAN_MAX_BACKENDS_PER_SE_MASK,
671						 rdev->config.cayman.num_shader_engines);
672	rdev->config.cayman.backend_map =
673		cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes,
674						    rdev->config.cayman.num_backends_per_se *
675						    rdev->config.cayman.num_shader_engines,
676						    &rdev->config.cayman.backend_disable_mask_per_asic,
677						    rdev->config.cayman.num_shader_engines);
678	tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
679	rdev->config.cayman.num_texture_channel_caches = r600_count_pipe_bits(tmp);
680	tmp = (mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT;
681	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
682	if (rdev->config.cayman.mem_max_burst_length_bytes > 512)
683		rdev->config.cayman.mem_max_burst_length_bytes = 512;
684	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
685	rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
686	if (rdev->config.cayman.mem_row_size_in_kb > 4)
687		rdev->config.cayman.mem_row_size_in_kb = 4;
688	/* XXX use MC settings? */
689	rdev->config.cayman.shader_engine_tile_size = 32;
690	rdev->config.cayman.num_gpus = 1;
691	rdev->config.cayman.multi_gpu_tile_size = 64;
692
693	//gb_addr_config = 0x02011003
694#if 0
695	gb_addr_config = RREG32(GB_ADDR_CONFIG);
696#else
697	gb_addr_config = 0;
698	switch (rdev->config.cayman.num_tile_pipes) {
699	case 1:
700	default:
701		gb_addr_config |= NUM_PIPES(0);
702		break;
703	case 2:
704		gb_addr_config |= NUM_PIPES(1);
705		break;
706	case 4:
707		gb_addr_config |= NUM_PIPES(2);
708		break;
709	case 8:
710		gb_addr_config |= NUM_PIPES(3);
711		break;
712	}
713
714	tmp = (rdev->config.cayman.mem_max_burst_length_bytes / 256) - 1;
715	gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
716	gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.cayman.num_shader_engines - 1);
717	tmp = (rdev->config.cayman.shader_engine_tile_size / 16) - 1;
718	gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
719	switch (rdev->config.cayman.num_gpus) {
720	case 1:
721	default:
722		gb_addr_config |= NUM_GPUS(0);
723		break;
724	case 2:
725		gb_addr_config |= NUM_GPUS(1);
726		break;
727	case 4:
728		gb_addr_config |= NUM_GPUS(2);
729		break;
730	}
731	switch (rdev->config.cayman.multi_gpu_tile_size) {
732	case 16:
733		gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
734		break;
735	case 32:
736	default:
737		gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
738		break;
739	case 64:
740		gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
741		break;
742	case 128:
743		gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
744		break;
745	}
746	switch (rdev->config.cayman.mem_row_size_in_kb) {
747	case 1:
748	default:
749		gb_addr_config |= ROW_SIZE(0);
750		break;
751	case 2:
752		gb_addr_config |= ROW_SIZE(1);
753		break;
754	case 4:
755		gb_addr_config |= ROW_SIZE(2);
756		break;
757	}
758#endif
759
760	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
761	rdev->config.cayman.num_tile_pipes = (1 << tmp);
762	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
763	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
764	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
765	rdev->config.cayman.num_shader_engines = tmp + 1;
766	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
767	rdev->config.cayman.num_gpus = tmp + 1;
768	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
769	rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
770	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
771	rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
772
773	//gb_backend_map = 0x76541032;
774#if 0
775	gb_backend_map = RREG32(GB_BACKEND_MAP);
776#else
777	gb_backend_map =
778		cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes,
779						    rdev->config.cayman.num_backends_per_se *
780						    rdev->config.cayman.num_shader_engines,
781						    &rdev->config.cayman.backend_disable_mask_per_asic,
782						    rdev->config.cayman.num_shader_engines);
783#endif
784	/* setup tiling info dword.  gb_addr_config is not adequate since it does
785	 * not have bank info, so create a custom tiling dword.
786	 * bits 3:0   num_pipes
787	 * bits 7:4   num_banks
788	 * bits 11:8  group_size
789	 * bits 15:12 row_size
790	 */
791	rdev->config.cayman.tile_config = 0;
792	switch (rdev->config.cayman.num_tile_pipes) {
793	case 1:
794	default:
795		rdev->config.cayman.tile_config |= (0 << 0);
796		break;
797	case 2:
798		rdev->config.cayman.tile_config |= (1 << 0);
799		break;
800	case 4:
801		rdev->config.cayman.tile_config |= (2 << 0);
802		break;
803	case 8:
804		rdev->config.cayman.tile_config |= (3 << 0);
805		break;
806	}
807	rdev->config.cayman.tile_config |=
808		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
809	rdev->config.cayman.tile_config |=
810		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
811	rdev->config.cayman.tile_config |=
812		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
813
814	rdev->config.cayman.backend_map = gb_backend_map;
815	WREG32(GB_BACKEND_MAP, gb_backend_map);
816	WREG32(GB_ADDR_CONFIG, gb_addr_config);
817	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
818	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
819
820	/* primary versions */
821	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
822	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
823	WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
824
825	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
826	WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
827
828	/* user versions */
829	WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
830	WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
831	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
832
833	WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
834	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
835
836	/* reprogram the shader complex */
837	cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
838	for (i = 0; i < 16; i++)
839		WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
840	WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
841
842	/* set HW defaults for 3D engine */
843	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
844
845	sx_debug_1 = RREG32(SX_DEBUG_1);
846	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
847	WREG32(SX_DEBUG_1, sx_debug_1);
848
849	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
850	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
851	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
852	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
853
854	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
855
856	/* need to be explicitly zero-ed */
857	WREG32(VGT_OFFCHIP_LDS_BASE, 0);
858	WREG32(SQ_LSTMP_RING_BASE, 0);
859	WREG32(SQ_HSTMP_RING_BASE, 0);
860	WREG32(SQ_ESTMP_RING_BASE, 0);
861	WREG32(SQ_GSTMP_RING_BASE, 0);
862	WREG32(SQ_VSTMP_RING_BASE, 0);
863	WREG32(SQ_PSTMP_RING_BASE, 0);
864
865	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
866
867	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
868					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
869					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
870
871	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
872				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
873				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
874
875
876	WREG32(VGT_NUM_INSTANCES, 1);
877
878	WREG32(CP_PERFMON_CNTL, 0);
879
880	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
881				  FETCH_FIFO_HIWATER(0x4) |
882				  DONE_FIFO_HIWATER(0xe0) |
883				  ALU_UPDATE_FIFO_HIWATER(0x8)));
884
885	WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
886	WREG32(SQ_CONFIG, (VC_ENABLE |
887			   EXPORT_SRC_C |
888			   GFX_PRIO(0) |
889			   CS1_PRIO(0) |
890			   CS2_PRIO(1)));
891	WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
892
893	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
894					  FORCE_EOV_MAX_REZ_CNT(255)));
895
896	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
897	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
898
899	WREG32(VGT_GS_VERTEX_REUSE, 16);
900	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
901
902	WREG32(CB_PERF_CTR0_SEL_0, 0);
903	WREG32(CB_PERF_CTR0_SEL_1, 0);
904	WREG32(CB_PERF_CTR1_SEL_0, 0);
905	WREG32(CB_PERF_CTR1_SEL_1, 0);
906	WREG32(CB_PERF_CTR2_SEL_0, 0);
907	WREG32(CB_PERF_CTR2_SEL_1, 0);
908	WREG32(CB_PERF_CTR3_SEL_0, 0);
909	WREG32(CB_PERF_CTR3_SEL_1, 0);
910
911	tmp = RREG32(HDP_MISC_CNTL);
912	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
913	WREG32(HDP_MISC_CNTL, tmp);
914
915	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
916	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
917
918	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
919
920	udelay(50);
921}
922
923/*
924 * GART
925 */
926void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
927{
928	/* flush hdp cache */
929	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
930
931	/* bits 0-7 are the VM contexts0-7 */
932	WREG32(VM_INVALIDATE_REQUEST, 1);
933}
934
935int cayman_pcie_gart_enable(struct radeon_device *rdev)
936{
937	int r;
938
939	if (rdev->gart.robj == NULL) {
940		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
941		return -EINVAL;
942	}
943	r = radeon_gart_table_vram_pin(rdev);
944	if (r)
945		return r;
946	radeon_gart_restore(rdev);
947	/* Setup TLB control */
948	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB |
949	       ENABLE_L1_FRAGMENT_PROCESSING |
950	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
951	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
952	/* Setup L2 cache */
953	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
954	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
955	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
956	       EFFECTIVE_L2_QUEUE_SIZE(7) |
957	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
958	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
959	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
960	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
961	/* setup context0 */
962	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
963	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
964	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
965	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
966			(u32)(rdev->dummy_page.addr >> 12));
967	WREG32(VM_CONTEXT0_CNTL2, 0);
968	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
969				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
970	/* disable context1-7 */
971	WREG32(VM_CONTEXT1_CNTL2, 0);
972	WREG32(VM_CONTEXT1_CNTL, 0);
973
974	cayman_pcie_gart_tlb_flush(rdev);
975	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
976		 (unsigned)(rdev->mc.gtt_size >> 20),
977		 (unsigned long long)rdev->gart.table_addr);
978	rdev->gart.ready = true;
979	return 0;
980}
981
982void cayman_pcie_gart_disable(struct radeon_device *rdev)
983{
984	/* Disable all tables */
985	WREG32(VM_CONTEXT0_CNTL, 0);
986	WREG32(VM_CONTEXT1_CNTL, 0);
987	/* Setup TLB control */
988	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
989	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
990	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
991	/* Setup L2 cache */
992	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
993	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
994	       EFFECTIVE_L2_QUEUE_SIZE(7) |
995	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
996	WREG32(VM_L2_CNTL2, 0);
997	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
998	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
999	radeon_gart_table_vram_unpin(rdev);
1000}
1001
1002void cayman_pcie_gart_fini(struct radeon_device *rdev)
1003{
1004	cayman_pcie_gart_disable(rdev);
1005	radeon_gart_table_vram_free(rdev);
1006	radeon_gart_fini(rdev);
1007}
1008
1009/*
1010 * CP.
1011 */
1012static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1013{
1014	if (enable)
1015		WREG32(CP_ME_CNTL, 0);
1016	else {
1017		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1018		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1019		WREG32(SCRATCH_UMSK, 0);
1020	}
1021}
1022
1023static int cayman_cp_load_microcode(struct radeon_device *rdev)
1024{
1025	const __be32 *fw_data;
1026	int i;
1027
1028	if (!rdev->me_fw || !rdev->pfp_fw)
1029		return -EINVAL;
1030
1031	cayman_cp_enable(rdev, false);
1032
1033	fw_data = (const __be32 *)rdev->pfp_fw->data;
1034	WREG32(CP_PFP_UCODE_ADDR, 0);
1035	for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1036		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1037	WREG32(CP_PFP_UCODE_ADDR, 0);
1038
1039	fw_data = (const __be32 *)rdev->me_fw->data;
1040	WREG32(CP_ME_RAM_WADDR, 0);
1041	for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1042		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1043
1044	WREG32(CP_PFP_UCODE_ADDR, 0);
1045	WREG32(CP_ME_RAM_WADDR, 0);
1046	WREG32(CP_ME_RAM_RADDR, 0);
1047	return 0;
1048}
1049
1050static int cayman_cp_start(struct radeon_device *rdev)
1051{
1052	struct radeon_cp *cp = &rdev->cp;
1053	int r, i;
1054
1055	r = radeon_ring_lock(rdev, cp, 7);
1056	if (r) {
1057		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1058		return r;
1059	}
1060	radeon_ring_write(cp, PACKET3(PACKET3_ME_INITIALIZE, 5));
1061	radeon_ring_write(cp, 0x1);
1062	radeon_ring_write(cp, 0x0);
1063	radeon_ring_write(cp, rdev->config.cayman.max_hw_contexts - 1);
1064	radeon_ring_write(cp, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1065	radeon_ring_write(cp, 0);
1066	radeon_ring_write(cp, 0);
1067	radeon_ring_unlock_commit(rdev, cp);
1068
1069	cayman_cp_enable(rdev, true);
1070
1071	r = radeon_ring_lock(rdev, cp, cayman_default_size + 19);
1072	if (r) {
1073		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1074		return r;
1075	}
1076
1077	/* setup clear context state */
1078	radeon_ring_write(cp, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1079	radeon_ring_write(cp, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1080
1081	for (i = 0; i < cayman_default_size; i++)
1082		radeon_ring_write(cp, cayman_default_state[i]);
1083
1084	radeon_ring_write(cp, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1085	radeon_ring_write(cp, PACKET3_PREAMBLE_END_CLEAR_STATE);
1086
1087	/* set clear context state */
1088	radeon_ring_write(cp, PACKET3(PACKET3_CLEAR_STATE, 0));
1089	radeon_ring_write(cp, 0);
1090
1091	/* SQ_VTX_BASE_VTX_LOC */
1092	radeon_ring_write(cp, 0xc0026f00);
1093	radeon_ring_write(cp, 0x00000000);
1094	radeon_ring_write(cp, 0x00000000);
1095	radeon_ring_write(cp, 0x00000000);
1096
1097	/* Clear consts */
1098	radeon_ring_write(cp, 0xc0036f00);
1099	radeon_ring_write(cp, 0x00000bc4);
1100	radeon_ring_write(cp, 0xffffffff);
1101	radeon_ring_write(cp, 0xffffffff);
1102	radeon_ring_write(cp, 0xffffffff);
1103
1104	radeon_ring_write(cp, 0xc0026900);
1105	radeon_ring_write(cp, 0x00000316);
1106	radeon_ring_write(cp, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1107	radeon_ring_write(cp, 0x00000010); /*  */
1108
1109	radeon_ring_unlock_commit(rdev, cp);
1110
1111	/* XXX init other rings */
1112
1113	return 0;
1114}
1115
1116static void cayman_cp_fini(struct radeon_device *rdev)
1117{
1118	cayman_cp_enable(rdev, false);
1119	radeon_ring_fini(rdev, &rdev->cp);
1120}
1121
1122int cayman_cp_resume(struct radeon_device *rdev)
1123{
1124	struct radeon_cp *cp;
1125	u32 tmp;
1126	u32 rb_bufsz;
1127	int r;
1128
1129	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1130	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1131				 SOFT_RESET_PA |
1132				 SOFT_RESET_SH |
1133				 SOFT_RESET_VGT |
1134				 SOFT_RESET_SPI |
1135				 SOFT_RESET_SX));
1136	RREG32(GRBM_SOFT_RESET);
1137	mdelay(15);
1138	WREG32(GRBM_SOFT_RESET, 0);
1139	RREG32(GRBM_SOFT_RESET);
1140
1141	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1142
1143	/* Set the write pointer delay */
1144	WREG32(CP_RB_WPTR_DELAY, 0);
1145
1146	WREG32(CP_DEBUG, (1 << 27));
1147
1148	/* ring 0 - compute and gfx */
1149	/* Set ring buffer size */
1150	cp = &rdev->cp;
1151	rb_bufsz = drm_order(cp->ring_size / 8);
1152	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1153#ifdef __BIG_ENDIAN
1154	tmp |= BUF_SWAP_32BIT;
1155#endif
1156	WREG32(CP_RB0_CNTL, tmp);
1157
1158	/* Initialize the ring buffer's read and write pointers */
1159	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1160	cp->wptr = 0;
1161	WREG32(CP_RB0_WPTR, cp->wptr);
1162
1163	/* set the wb address wether it's enabled or not */
1164	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1165	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1166	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1167
1168	if (rdev->wb.enabled)
1169		WREG32(SCRATCH_UMSK, 0xff);
1170	else {
1171		tmp |= RB_NO_UPDATE;
1172		WREG32(SCRATCH_UMSK, 0);
1173	}
1174
1175	mdelay(1);
1176	WREG32(CP_RB0_CNTL, tmp);
1177
1178	WREG32(CP_RB0_BASE, cp->gpu_addr >> 8);
1179
1180	cp->rptr = RREG32(CP_RB0_RPTR);
1181
1182	/* ring1  - compute only */
1183	/* Set ring buffer size */
1184	cp = &rdev->cp1;
1185	rb_bufsz = drm_order(cp->ring_size / 8);
1186	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1187#ifdef __BIG_ENDIAN
1188	tmp |= BUF_SWAP_32BIT;
1189#endif
1190	WREG32(CP_RB1_CNTL, tmp);
1191
1192	/* Initialize the ring buffer's read and write pointers */
1193	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
1194	cp->wptr = 0;
1195	WREG32(CP_RB1_WPTR, cp->wptr);
1196
1197	/* set the wb address wether it's enabled or not */
1198	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
1199	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
1200
1201	mdelay(1);
1202	WREG32(CP_RB1_CNTL, tmp);
1203
1204	WREG32(CP_RB1_BASE, cp->gpu_addr >> 8);
1205
1206	cp->rptr = RREG32(CP_RB1_RPTR);
1207
1208	/* ring2 - compute only */
1209	/* Set ring buffer size */
1210	cp = &rdev->cp2;
1211	rb_bufsz = drm_order(cp->ring_size / 8);
1212	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1213#ifdef __BIG_ENDIAN
1214	tmp |= BUF_SWAP_32BIT;
1215#endif
1216	WREG32(CP_RB2_CNTL, tmp);
1217
1218	/* Initialize the ring buffer's read and write pointers */
1219	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
1220	cp->wptr = 0;
1221	WREG32(CP_RB2_WPTR, cp->wptr);
1222
1223	/* set the wb address wether it's enabled or not */
1224	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
1225	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
1226
1227	mdelay(1);
1228	WREG32(CP_RB2_CNTL, tmp);
1229
1230	WREG32(CP_RB2_BASE, cp->gpu_addr >> 8);
1231
1232	cp->rptr = RREG32(CP_RB2_RPTR);
1233
1234	/* start the rings */
1235	cayman_cp_start(rdev);
1236	rdev->cp.ready = true;
1237	rdev->cp1.ready = true;
1238	rdev->cp2.ready = true;
1239	/* this only test cp0 */
1240	r = radeon_ring_test(rdev, &rdev->cp);
1241	if (r) {
1242		rdev->cp.ready = false;
1243		rdev->cp1.ready = false;
1244		rdev->cp2.ready = false;
1245		return r;
1246	}
1247
1248	return 0;
1249}
1250
1251bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_cp *cp)
1252{
1253	u32 srbm_status;
1254	u32 grbm_status;
1255	u32 grbm_status_se0, grbm_status_se1;
1256	struct r100_gpu_lockup *lockup = &rdev->config.cayman.lockup;
1257	int r;
1258
1259	srbm_status = RREG32(SRBM_STATUS);
1260	grbm_status = RREG32(GRBM_STATUS);
1261	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1262	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1263	if (!(grbm_status & GUI_ACTIVE)) {
1264		r100_gpu_lockup_update(lockup, cp);
1265		return false;
1266	}
1267	/* force CP activities */
1268	r = radeon_ring_lock(rdev, cp, 2);
1269	if (!r) {
1270		/* PACKET2 NOP */
1271		radeon_ring_write(cp, 0x80000000);
1272		radeon_ring_write(cp, 0x80000000);
1273		radeon_ring_unlock_commit(rdev, cp);
1274	}
1275	/* XXX deal with CP0,1,2 */
1276	cp->rptr = RREG32(cp->rptr_reg);
1277	return r100_gpu_cp_is_lockup(rdev, lockup, cp);
1278}
1279
1280static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1281{
1282	struct evergreen_mc_save save;
1283	u32 grbm_reset = 0;
1284
1285	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1286		return 0;
1287
1288	dev_info(rdev->dev, "GPU softreset \n");
1289	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
1290		RREG32(GRBM_STATUS));
1291	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
1292		RREG32(GRBM_STATUS_SE0));
1293	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
1294		RREG32(GRBM_STATUS_SE1));
1295	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
1296		RREG32(SRBM_STATUS));
1297	evergreen_mc_stop(rdev, &save);
1298	if (evergreen_mc_wait_for_idle(rdev)) {
1299		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1300	}
1301	/* Disable CP parsing/prefetching */
1302	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1303
1304	/* reset all the gfx blocks */
1305	grbm_reset = (SOFT_RESET_CP |
1306		      SOFT_RESET_CB |
1307		      SOFT_RESET_DB |
1308		      SOFT_RESET_GDS |
1309		      SOFT_RESET_PA |
1310		      SOFT_RESET_SC |
1311		      SOFT_RESET_SPI |
1312		      SOFT_RESET_SH |
1313		      SOFT_RESET_SX |
1314		      SOFT_RESET_TC |
1315		      SOFT_RESET_TA |
1316		      SOFT_RESET_VGT |
1317		      SOFT_RESET_IA);
1318
1319	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1320	WREG32(GRBM_SOFT_RESET, grbm_reset);
1321	(void)RREG32(GRBM_SOFT_RESET);
1322	udelay(50);
1323	WREG32(GRBM_SOFT_RESET, 0);
1324	(void)RREG32(GRBM_SOFT_RESET);
1325	/* Wait a little for things to settle down */
1326	udelay(50);
1327	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
1328		RREG32(GRBM_STATUS));
1329	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
1330		RREG32(GRBM_STATUS_SE0));
1331	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
1332		RREG32(GRBM_STATUS_SE1));
1333	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
1334		RREG32(SRBM_STATUS));
1335	evergreen_mc_resume(rdev, &save);
1336	return 0;
1337}
1338
1339int cayman_asic_reset(struct radeon_device *rdev)
1340{
1341	return cayman_gpu_soft_reset(rdev);
1342}
1343
1344static int cayman_startup(struct radeon_device *rdev)
1345{
1346	struct radeon_cp *cp = &rdev->cp;
1347	int r;
1348
1349	/* enable pcie gen2 link */
1350	evergreen_pcie_gen2_enable(rdev);
1351
1352	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1353		r = ni_init_microcode(rdev);
1354		if (r) {
1355			DRM_ERROR("Failed to load firmware!\n");
1356			return r;
1357		}
1358	}
1359	r = ni_mc_load_microcode(rdev);
1360	if (r) {
1361		DRM_ERROR("Failed to load MC firmware!\n");
1362		return r;
1363	}
1364
1365	r = r600_vram_scratch_init(rdev);
1366	if (r)
1367		return r;
1368
1369	evergreen_mc_program(rdev);
1370	r = cayman_pcie_gart_enable(rdev);
1371	if (r)
1372		return r;
1373	cayman_gpu_init(rdev);
1374
1375	r = evergreen_blit_init(rdev);
1376	if (r) {
1377		r600_blit_fini(rdev);
1378		rdev->asic->copy = NULL;
1379		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1380	}
1381
1382	/* allocate wb buffer */
1383	r = radeon_wb_init(rdev);
1384	if (r)
1385		return r;
1386
1387	/* Enable IRQ */
1388	r = r600_irq_init(rdev);
1389	if (r) {
1390		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1391		radeon_irq_kms_fini(rdev);
1392		return r;
1393	}
1394	evergreen_irq_set(rdev);
1395
1396	r = radeon_ring_init(rdev, cp, cp->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1397			     CP_RB0_RPTR, CP_RB0_WPTR);
1398	if (r)
1399		return r;
1400	r = cayman_cp_load_microcode(rdev);
1401	if (r)
1402		return r;
1403	r = cayman_cp_resume(rdev);
1404	if (r)
1405		return r;
1406
1407	return 0;
1408}
1409
1410int cayman_resume(struct radeon_device *rdev)
1411{
1412	int r;
1413
1414	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1415	 * posting will perform necessary task to bring back GPU into good
1416	 * shape.
1417	 */
1418	/* post card */
1419	atom_asic_init(rdev->mode_info.atom_context);
1420
1421	r = cayman_startup(rdev);
1422	if (r) {
1423		DRM_ERROR("cayman startup failed on resume\n");
1424		return r;
1425	}
1426
1427	r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX);
1428	if (r) {
1429		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
1430		return r;
1431	}
1432
1433	return r;
1434
1435}
1436
1437int cayman_suspend(struct radeon_device *rdev)
1438{
1439	/* FIXME: we should wait for ring to be empty */
1440	cayman_cp_enable(rdev, false);
1441	rdev->cp.ready = false;
1442	evergreen_irq_suspend(rdev);
1443	radeon_wb_disable(rdev);
1444	cayman_pcie_gart_disable(rdev);
1445	r600_blit_suspend(rdev);
1446
1447	return 0;
1448}
1449
1450/* Plan is to move initialization in that function and use
1451 * helper function so that radeon_device_init pretty much
1452 * do nothing more than calling asic specific function. This
1453 * should also allow to remove a bunch of callback function
1454 * like vram_info.
1455 */
1456int cayman_init(struct radeon_device *rdev)
1457{
1458	struct radeon_cp *cp = &rdev->cp;
1459	int r;
1460
1461	/* This don't do much */
1462	r = radeon_gem_init(rdev);
1463	if (r)
1464		return r;
1465	/* Read BIOS */
1466	if (!radeon_get_bios(rdev)) {
1467		if (ASIC_IS_AVIVO(rdev))
1468			return -EINVAL;
1469	}
1470	/* Must be an ATOMBIOS */
1471	if (!rdev->is_atom_bios) {
1472		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
1473		return -EINVAL;
1474	}
1475	r = radeon_atombios_init(rdev);
1476	if (r)
1477		return r;
1478
1479	/* Post card if necessary */
1480	if (!radeon_card_posted(rdev)) {
1481		if (!rdev->bios) {
1482			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1483			return -EINVAL;
1484		}
1485		DRM_INFO("GPU not posted. posting now...\n");
1486		atom_asic_init(rdev->mode_info.atom_context);
1487	}
1488	/* Initialize scratch registers */
1489	r600_scratch_init(rdev);
1490	/* Initialize surface registers */
1491	radeon_surface_init(rdev);
1492	/* Initialize clocks */
1493	radeon_get_clock_info(rdev->ddev);
1494	/* Fence driver */
1495	r = radeon_fence_driver_init(rdev, 3);
1496	if (r)
1497		return r;
1498	/* initialize memory controller */
1499	r = evergreen_mc_init(rdev);
1500	if (r)
1501		return r;
1502	/* Memory manager */
1503	r = radeon_bo_init(rdev);
1504	if (r)
1505		return r;
1506
1507	r = radeon_irq_kms_init(rdev);
1508	if (r)
1509		return r;
1510
1511	cp->ring_obj = NULL;
1512	r600_ring_init(rdev, cp, 1024 * 1024);
1513
1514	rdev->ih.ring_obj = NULL;
1515	r600_ih_ring_init(rdev, 64 * 1024);
1516
1517	r = r600_pcie_gart_init(rdev);
1518	if (r)
1519		return r;
1520
1521	rdev->accel_working = true;
1522	r = cayman_startup(rdev);
1523	if (r) {
1524		dev_err(rdev->dev, "disabling GPU acceleration\n");
1525		cayman_cp_fini(rdev);
1526		r600_irq_fini(rdev);
1527		radeon_wb_fini(rdev);
1528		radeon_irq_kms_fini(rdev);
1529		cayman_pcie_gart_fini(rdev);
1530		rdev->accel_working = false;
1531	}
1532	if (rdev->accel_working) {
1533		r = radeon_ib_pool_init(rdev);
1534		if (r) {
1535			DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r);
1536			rdev->accel_working = false;
1537		}
1538		r = r600_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX);
1539		if (r) {
1540			DRM_ERROR("radeon: failed testing IB (%d).\n", r);
1541			rdev->accel_working = false;
1542		}
1543	}
1544
1545	/* Don't start up if the MC ucode is missing.
1546	 * The default clocks and voltages before the MC ucode
1547	 * is loaded are not suffient for advanced operations.
1548	 */
1549	if (!rdev->mc_fw) {
1550		DRM_ERROR("radeon: MC ucode required for NI+.\n");
1551		return -EINVAL;
1552	}
1553
1554	return 0;
1555}
1556
1557void cayman_fini(struct radeon_device *rdev)
1558{
1559	r600_blit_fini(rdev);
1560	cayman_cp_fini(rdev);
1561	r600_irq_fini(rdev);
1562	radeon_wb_fini(rdev);
1563	radeon_ib_pool_fini(rdev);
1564	radeon_irq_kms_fini(rdev);
1565	cayman_pcie_gart_fini(rdev);
1566	r600_vram_scratch_fini(rdev);
1567	radeon_gem_fini(rdev);
1568	radeon_semaphore_driver_fini(rdev);
1569	radeon_fence_driver_fini(rdev);
1570	radeon_bo_fini(rdev);
1571	radeon_atombios_fini(rdev);
1572	kfree(rdev->bios);
1573	rdev->bios = NULL;
1574}
1575
1576