1/*
2 * Low-Level PCI Express Support for the SH7786
3 *
4 *  Copyright (C) 2009 - 2011  Paul Mundt
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License.  See the file "COPYING" in the main directory of this archive
8 * for more details.
9 */
10#define pr_fmt(fmt) "PCI: " fmt
11
12#include <linux/pci.h>
13#include <linux/init.h>
14#include <linux/kernel.h>
15#include <linux/io.h>
16#include <linux/async.h>
17#include <linux/delay.h>
18#include <linux/slab.h>
19#include <linux/clk.h>
20#include <linux/sh_clk.h>
21#include "pcie-sh7786.h"
22#include <asm/sizes.h>
23
24struct sh7786_pcie_port {
25	struct pci_channel	*hose;
26	struct clk		*fclk, phy_clk;
27	unsigned int		index;
28	int			endpoint;
29	int			link;
30};
31
32static struct sh7786_pcie_port *sh7786_pcie_ports;
33static unsigned int nr_ports;
34
35static struct sh7786_pcie_hwops {
36	int (*core_init)(void);
37	async_func_ptr *port_init_hw;
38} *sh7786_pcie_hwops;
39
40static struct resource sh7786_pci0_resources[] = {
41	{
42		.name	= "PCIe0 IO",
43		.start	= 0xfd000000,
44		.end	= 0xfd000000 + SZ_8M - 1,
45		.flags	= IORESOURCE_IO,
46	}, {
47		.name	= "PCIe0 MEM 0",
48		.start	= 0xc0000000,
49		.end	= 0xc0000000 + SZ_512M - 1,
50		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
51	}, {
52		.name	= "PCIe0 MEM 1",
53		.start	= 0x10000000,
54		.end	= 0x10000000 + SZ_64M - 1,
55		.flags	= IORESOURCE_MEM,
56	}, {
57		.name	= "PCIe0 MEM 2",
58		.start	= 0xfe100000,
59		.end	= 0xfe100000 + SZ_1M - 1,
60		.flags	= IORESOURCE_MEM,
61	},
62};
63
64static struct resource sh7786_pci1_resources[] = {
65	{
66		.name	= "PCIe1 IO",
67		.start	= 0xfd800000,
68		.end	= 0xfd800000 + SZ_8M - 1,
69		.flags	= IORESOURCE_IO,
70	}, {
71		.name	= "PCIe1 MEM 0",
72		.start	= 0xa0000000,
73		.end	= 0xa0000000 + SZ_512M - 1,
74		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
75	}, {
76		.name	= "PCIe1 MEM 1",
77		.start	= 0x30000000,
78		.end	= 0x30000000 + SZ_256M - 1,
79		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
80	}, {
81		.name	= "PCIe1 MEM 2",
82		.start	= 0xfe300000,
83		.end	= 0xfe300000 + SZ_1M - 1,
84		.flags	= IORESOURCE_MEM,
85	},
86};
87
88static struct resource sh7786_pci2_resources[] = {
89	{
90		.name	= "PCIe2 IO",
91		.start	= 0xfc800000,
92		.end	= 0xfc800000 + SZ_4M - 1,
93		.flags	= IORESOURCE_IO,
94	}, {
95		.name	= "PCIe2 MEM 0",
96		.start	= 0x80000000,
97		.end	= 0x80000000 + SZ_512M - 1,
98		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
99	}, {
100		.name	= "PCIe2 MEM 1",
101		.start	= 0x20000000,
102		.end	= 0x20000000 + SZ_256M - 1,
103		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
104	}, {
105		.name	= "PCIe2 MEM 2",
106		.start	= 0xfcd00000,
107		.end	= 0xfcd00000 + SZ_1M - 1,
108		.flags	= IORESOURCE_MEM,
109	},
110};
111
112extern struct pci_ops sh7786_pci_ops;
113
114#define DEFINE_CONTROLLER(start, idx)					\
115{									\
116	.pci_ops	= &sh7786_pci_ops,				\
117	.resources	= sh7786_pci##idx##_resources,			\
118	.nr_resources	= ARRAY_SIZE(sh7786_pci##idx##_resources),	\
119	.reg_base	= start,					\
120	.mem_offset	= 0,						\
121	.io_offset	= 0,						\
122}
123
124static struct pci_channel sh7786_pci_channels[] = {
125	DEFINE_CONTROLLER(0xfe000000, 0),
126	DEFINE_CONTROLLER(0xfe200000, 1),
127	DEFINE_CONTROLLER(0xfcc00000, 2),
128};
129
130static struct clk fixed_pciexclkp = {
131	.rate = 100000000,	/* 100 MHz reference clock */
132};
133
134static void __devinit sh7786_pci_fixup(struct pci_dev *dev)
135{
136	/*
137	 * Prevent enumeration of root complex resources.
138	 */
139	if (pci_is_root_bus(dev->bus) && dev->devfn == 0) {
140		int i;
141
142		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
143			dev->resource[i].start	= 0;
144			dev->resource[i].end	= 0;
145			dev->resource[i].flags	= 0;
146		}
147	}
148}
149DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786,
150			 sh7786_pci_fixup);
151
152static int __init phy_wait_for_ack(struct pci_channel *chan)
153{
154	unsigned int timeout = 100;
155
156	while (timeout--) {
157		if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK))
158			return 0;
159
160		udelay(100);
161	}
162
163	return -ETIMEDOUT;
164}
165
166static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask)
167{
168	unsigned int timeout = 100;
169
170	while (timeout--) {
171		if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask)
172			return 0;
173
174		udelay(100);
175	}
176
177	return -ETIMEDOUT;
178}
179
180static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr,
181				 unsigned int lane, unsigned int data)
182{
183	unsigned long phyaddr;
184
185	phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) +
186			((addr & 0xff) << BITS_ADR);
187
188	/* Set write data */
189	pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR);
190	pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR);
191
192	phy_wait_for_ack(chan);
193
194	/* Clear command */
195	pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR);
196	pci_write_reg(chan, 0, SH4A_PCIEPHYADRR);
197
198	phy_wait_for_ack(chan);
199}
200
201static int __init pcie_clk_init(struct sh7786_pcie_port *port)
202{
203	struct pci_channel *chan = port->hose;
204	struct clk *clk;
205	char fclk_name[16];
206	int ret;
207
208	/*
209	 * First register the fixed clock
210	 */
211	ret = clk_register(&fixed_pciexclkp);
212	if (unlikely(ret != 0))
213		return ret;
214
215	/*
216	 * Grab the port's function clock, which the PHY clock depends
217	 * on. clock lookups don't help us much at this point, since no
218	 * dev_id is available this early. Lame.
219	 */
220	snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index);
221
222	port->fclk = clk_get(NULL, fclk_name);
223	if (IS_ERR(port->fclk)) {
224		ret = PTR_ERR(port->fclk);
225		goto err_fclk;
226	}
227
228	clk_enable(port->fclk);
229
230	/*
231	 * And now, set up the PHY clock
232	 */
233	clk = &port->phy_clk;
234
235	memset(clk, 0, sizeof(struct clk));
236
237	clk->parent = &fixed_pciexclkp;
238	clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR);
239	clk->enable_bit = BITS_CKE;
240
241	ret = sh_clk_mstp32_register(clk, 1);
242	if (unlikely(ret < 0))
243		goto err_phy;
244
245	return 0;
246
247err_phy:
248	clk_disable(port->fclk);
249	clk_put(port->fclk);
250err_fclk:
251	clk_unregister(&fixed_pciexclkp);
252
253	return ret;
254}
255
256static int __init phy_init(struct sh7786_pcie_port *port)
257{
258	struct pci_channel *chan = port->hose;
259	unsigned int timeout = 100;
260
261	clk_enable(&port->phy_clk);
262
263	/* Initialize the phy */
264	phy_write_reg(chan, 0x60, 0xf, 0x004b008b);
265	phy_write_reg(chan, 0x61, 0xf, 0x00007b41);
266	phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00);
267	phy_write_reg(chan, 0x65, 0xf, 0x09070907);
268	phy_write_reg(chan, 0x66, 0xf, 0x00000010);
269	phy_write_reg(chan, 0x74, 0xf, 0x0007001c);
270	phy_write_reg(chan, 0x79, 0xf, 0x01fc000d);
271	phy_write_reg(chan, 0xb0, 0xf, 0x00000610);
272
273	/* Deassert Standby */
274	phy_write_reg(chan, 0x67, 0x1, 0x00000400);
275
276	/* Disable clock */
277	clk_disable(&port->phy_clk);
278
279	while (timeout--) {
280		if (pci_read_reg(chan, SH4A_PCIEPHYSR))
281			return 0;
282
283		udelay(100);
284	}
285
286	return -ETIMEDOUT;
287}
288
289static void __init pcie_reset(struct sh7786_pcie_port *port)
290{
291	struct pci_channel *chan = port->hose;
292
293	pci_write_reg(chan, 1, SH4A_PCIESRSTR);
294	pci_write_reg(chan, 0, SH4A_PCIETCTLR);
295	pci_write_reg(chan, 0, SH4A_PCIESRSTR);
296	pci_write_reg(chan, 0, SH4A_PCIETXVC0SR);
297}
298
299static int __init pcie_init(struct sh7786_pcie_port *port)
300{
301	struct pci_channel *chan = port->hose;
302	unsigned int data;
303	phys_addr_t memphys;
304	size_t memsize;
305	int ret, i, win;
306
307	/* Begin initialization */
308	pcie_reset(port);
309
310	/*
311	 * Initial header for port config space is type 1, set the device
312	 * class to match. Hardware takes care of propagating the IDSETR
313	 * settings, so there is no need to bother with a quirk.
314	 */
315	pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1);
316
317	/* Initialize default capabilities. */
318	data = pci_read_reg(chan, SH4A_PCIEEXPCAP0);
319	data &= ~(PCI_EXP_FLAGS_TYPE << 16);
320
321	if (port->endpoint)
322		data |= PCI_EXP_TYPE_ENDPOINT << 20;
323	else
324		data |= PCI_EXP_TYPE_ROOT_PORT << 20;
325
326	data |= PCI_CAP_ID_EXP;
327	pci_write_reg(chan, data, SH4A_PCIEEXPCAP0);
328
329	/* Enable data link layer active state reporting */
330	pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3);
331
332	/* Enable extended sync and ASPM L0s support */
333	data = pci_read_reg(chan, SH4A_PCIEEXPCAP4);
334	data &= ~PCI_EXP_LNKCTL_ASPMC;
335	data |= PCI_EXP_LNKCTL_ES | 1;
336	pci_write_reg(chan, data, SH4A_PCIEEXPCAP4);
337
338	/* Write out the physical slot number */
339	data = pci_read_reg(chan, SH4A_PCIEEXPCAP5);
340	data &= ~PCI_EXP_SLTCAP_PSN;
341	data |= (port->index + 1) << 19;
342	pci_write_reg(chan, data, SH4A_PCIEEXPCAP5);
343
344	/* Set the completion timer timeout to the maximum 32ms. */
345	data = pci_read_reg(chan, SH4A_PCIETLCTLR);
346	data &= ~0x3f00;
347	data |= 0x32 << 8;
348	pci_write_reg(chan, data, SH4A_PCIETLCTLR);
349
350	/*
351	 * Set fast training sequences to the maximum 255,
352	 * and enable MAC data scrambling.
353	 */
354	data = pci_read_reg(chan, SH4A_PCIEMACCTLR);
355	data &= ~PCIEMACCTLR_SCR_DIS;
356	data |= (0xff << 16);
357	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);
358
359	memphys = __pa(memory_start);
360	memsize = roundup_pow_of_two(memory_end - memory_start);
361
362	/*
363	 * If there's more than 512MB of memory, we need to roll over to
364	 * LAR1/LAMR1.
365	 */
366	if (memsize > SZ_512M) {
367		pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1);
368		pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1,
369			      SH4A_PCIELAMR1);
370		memsize = SZ_512M;
371	} else {
372		/*
373		 * Otherwise just zero it out and disable it.
374		 */
375		pci_write_reg(chan, 0, SH4A_PCIELAR1);
376		pci_write_reg(chan, 0, SH4A_PCIELAMR1);
377	}
378
379	/*
380	 * LAR0/LAMR0 covers up to the first 512MB, which is enough to
381	 * cover all of lowmem on most platforms.
382	 */
383	pci_write_reg(chan, memphys, SH4A_PCIELAR0);
384	pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0);
385
386	/* Finish initialization */
387	data = pci_read_reg(chan, SH4A_PCIETCTLR);
388	data |= 0x1;
389	pci_write_reg(chan, data, SH4A_PCIETCTLR);
390
391	/* Let things settle down a bit.. */
392	mdelay(100);
393
394	/* Enable DL_Active Interrupt generation */
395	data = pci_read_reg(chan, SH4A_PCIEDLINTENR);
396	data |= PCIEDLINTENR_DLL_ACT_ENABLE;
397	pci_write_reg(chan, data, SH4A_PCIEDLINTENR);
398
399	/* Disable MAC data scrambling. */
400	data = pci_read_reg(chan, SH4A_PCIEMACCTLR);
401	data |= PCIEMACCTLR_SCR_DIS | (0xff << 16);
402	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);
403
404	/*
405	 * This will timeout if we don't have a link, but we permit the
406	 * port to register anyways in order to support hotplug on future
407	 * hardware.
408	 */
409	ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL);
410
411	data = pci_read_reg(chan, SH4A_PCIEPCICONF1);
412	data &= ~(PCI_STATUS_DEVSEL_MASK << 16);
413	data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
414		(PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16;
415	pci_write_reg(chan, data, SH4A_PCIEPCICONF1);
416
417	pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR);
418	pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR);
419
420	wmb();
421
422	if (ret == 0) {
423		data = pci_read_reg(chan, SH4A_PCIEMACSR);
424		printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n",
425		       port->index, (data >> 20) & 0x3f);
426	} else
427		printk(KERN_NOTICE "PCI: PCIe#%d link down\n",
428		       port->index);
429
430	for (i = win = 0; i < chan->nr_resources; i++) {
431		struct resource *res = chan->resources + i;
432		resource_size_t size;
433		u32 mask;
434
435		/*
436		 * We can't use the 32-bit mode windows in legacy 29-bit
437		 * mode, so just skip them entirely.
438		 */
439		if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode())
440			continue;
441
442		pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win));
443
444		/*
445		 * The PAMR mask is calculated in units of 256kB, which
446		 * keeps things pretty simple.
447		 */
448		size = resource_size(res);
449		mask = (roundup_pow_of_two(size) / SZ_256K) - 1;
450		pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win));
451
452		pci_write_reg(chan, upper_32_bits(res->start),
453			      SH4A_PCIEPARH(win));
454		pci_write_reg(chan, lower_32_bits(res->start),
455			      SH4A_PCIEPARL(win));
456
457		mask = MASK_PARE;
458		if (res->flags & IORESOURCE_IO)
459			mask |= MASK_SPC;
460
461		pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win));
462
463		win++;
464	}
465
466	return 0;
467}
468
469int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
470{
471        return 71;
472}
473
474static int __init sh7786_pcie_core_init(void)
475{
476	/* Return the number of ports */
477	return test_mode_pin(MODE_PIN12) ? 3 : 2;
478}
479
480static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie)
481{
482	struct sh7786_pcie_port *port = data;
483	int ret;
484
485	/*
486	 * Check if we are configured in endpoint or root complex mode,
487	 * this is a fixed pin setting that applies to all PCIe ports.
488	 */
489	port->endpoint = test_mode_pin(MODE_PIN11);
490
491	/*
492	 * Setup clocks, needed both for PHY and PCIe registers.
493	 */
494	ret = pcie_clk_init(port);
495	if (unlikely(ret < 0)) {
496		pr_err("clock initialization failed for port#%d\n",
497		       port->index);
498		return;
499	}
500
501	ret = phy_init(port);
502	if (unlikely(ret < 0)) {
503		pr_err("phy initialization failed for port#%d\n",
504		       port->index);
505		return;
506	}
507
508	ret = pcie_init(port);
509	if (unlikely(ret < 0)) {
510		pr_err("core initialization failed for port#%d\n",
511			       port->index);
512		return;
513	}
514
515	/* In the interest of preserving device ordering, synchronize */
516	async_synchronize_cookie(cookie);
517
518	register_pci_controller(port->hose);
519}
520
521static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = {
522	.core_init	= sh7786_pcie_core_init,
523	.port_init_hw	= sh7786_pcie_init_hw,
524};
525
526static int __init sh7786_pcie_init(void)
527{
528	struct clk *platclk;
529	int i;
530
531	printk(KERN_NOTICE "PCI: Starting initialization.\n");
532
533	sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops;
534
535	nr_ports = sh7786_pcie_hwops->core_init();
536	BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels));
537
538	if (unlikely(nr_ports == 0))
539		return -ENODEV;
540
541	sh7786_pcie_ports = kzalloc(nr_ports * sizeof(struct sh7786_pcie_port),
542				    GFP_KERNEL);
543	if (unlikely(!sh7786_pcie_ports))
544		return -ENOMEM;
545
546	/*
547	 * Fetch any optional platform clock associated with this block.
548	 *
549	 * This is a rather nasty hack for boards with spec-mocking FPGAs
550	 * that have a secondary set of clocks outside of the on-chip
551	 * ones that need to be accounted for before there is any chance
552	 * of touching the existing MSTP bits or CPG clocks.
553	 */
554	platclk = clk_get(NULL, "pcie_plat_clk");
555	if (IS_ERR(platclk)) {
556		/* Sane hardware should probably get a WARN_ON.. */
557		platclk = NULL;
558	}
559
560	clk_enable(platclk);
561
562	printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports);
563
564	for (i = 0; i < nr_ports; i++) {
565		struct sh7786_pcie_port *port = sh7786_pcie_ports + i;
566
567		port->index		= i;
568		port->hose		= sh7786_pci_channels + i;
569		port->hose->io_map_base	= port->hose->resources[0].start;
570
571		async_schedule(sh7786_pcie_hwops->port_init_hw, port);
572	}
573
574	async_synchronize_full();
575
576	return 0;
577}
578arch_initcall(sh7786_pcie_init);
579