pci-ioda.c revision aa0c033f99d9c32a8dd6b1e07d41caf1fced0e1a
1/*
2 * Support PCI/PCIe on PowerNV platforms
3 *
4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#undef DEBUG
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <linux/irq.h>
21#include <linux/io.h>
22#include <linux/msi.h>
23
24#include <asm/sections.h>
25#include <asm/io.h>
26#include <asm/prom.h>
27#include <asm/pci-bridge.h>
28#include <asm/machdep.h>
29#include <asm/msi_bitmap.h>
30#include <asm/ppc-pci.h>
31#include <asm/opal.h>
32#include <asm/iommu.h>
33#include <asm/tce.h>
34
35#include "powernv.h"
36#include "pci.h"
37
38#define define_pe_printk_level(func, kern_level)		\
39static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)	\
40{								\
41	struct va_format vaf;					\
42	va_list args;						\
43	char pfix[32];						\
44	int r;							\
45								\
46	va_start(args, fmt);					\
47								\
48	vaf.fmt = fmt;						\
49	vaf.va = &args;						\
50								\
51	if (pe->pdev)						\
52		strlcpy(pfix, dev_name(&pe->pdev->dev),		\
53			sizeof(pfix));				\
54	else							\
55		sprintf(pfix, "%04x:%02x     ",			\
56			pci_domain_nr(pe->pbus),		\
57			pe->pbus->number);			\
58	r = printk(kern_level "pci %s: [PE# %.3d] %pV",		\
59		   pfix, pe->pe_number, &vaf);			\
60								\
61	va_end(args);						\
62								\
63	return r;						\
64}								\
65
66define_pe_printk_level(pe_err, KERN_ERR);
67define_pe_printk_level(pe_warn, KERN_WARNING);
68define_pe_printk_level(pe_info, KERN_INFO);
69
70static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
71{
72	struct device_node *np;
73
74	np = pci_device_to_OF_node(dev);
75	if (!np)
76		return NULL;
77	return PCI_DN(np);
78}
79
80static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
81{
82	unsigned long pe;
83
84	do {
85		pe = find_next_zero_bit(phb->ioda.pe_alloc,
86					phb->ioda.total_pe, 0);
87		if (pe >= phb->ioda.total_pe)
88			return IODA_INVALID_PE;
89	} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
90
91	phb->ioda.pe_array[pe].pe_number = pe;
92	return pe;
93}
94
95static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
96{
97	WARN_ON(phb->ioda.pe_array[pe].pdev);
98
99	memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
100	clear_bit(pe, phb->ioda.pe_alloc);
101}
102
103/* Currently those 2 are only used when MSIs are enabled, this will change
104 * but in the meantime, we need to protect them to avoid warnings
105 */
106#ifdef CONFIG_PCI_MSI
107static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
108{
109	struct pci_controller *hose = pci_bus_to_host(dev->bus);
110	struct pnv_phb *phb = hose->private_data;
111	struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
112
113	if (!pdn)
114		return NULL;
115	if (pdn->pe_number == IODA_INVALID_PE)
116		return NULL;
117	return &phb->ioda.pe_array[pdn->pe_number];
118}
119#endif /* CONFIG_PCI_MSI */
120
121static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
122{
123	struct pci_dev *parent;
124	uint8_t bcomp, dcomp, fcomp;
125	long rc, rid_end, rid;
126
127	/* Bus validation ? */
128	if (pe->pbus) {
129		int count;
130
131		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
132		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
133		parent = pe->pbus->self;
134		if (pe->flags & PNV_IODA_PE_BUS_ALL)
135			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
136		else
137			count = 1;
138
139		switch(count) {
140		case  1: bcomp = OpalPciBusAll;		break;
141		case  2: bcomp = OpalPciBus7Bits;	break;
142		case  4: bcomp = OpalPciBus6Bits;	break;
143		case  8: bcomp = OpalPciBus5Bits;	break;
144		case 16: bcomp = OpalPciBus4Bits;	break;
145		case 32: bcomp = OpalPciBus3Bits;	break;
146		default:
147			pr_err("%s: Number of subordinate busses %d"
148			       " unsupported\n",
149			       pci_name(pe->pbus->self), count);
150			/* Do an exact match only */
151			bcomp = OpalPciBusAll;
152		}
153		rid_end = pe->rid + (count << 8);
154	} else {
155		parent = pe->pdev->bus->self;
156		bcomp = OpalPciBusAll;
157		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
158		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
159		rid_end = pe->rid + 1;
160	}
161
162	/* Associate PE in PELT */
163	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
164			     bcomp, dcomp, fcomp, OPAL_MAP_PE);
165	if (rc) {
166		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
167		return -ENXIO;
168	}
169	opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
170				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
171
172	/* Add to all parents PELT-V */
173	while (parent) {
174		struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
175		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
176			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
177						pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
178			/* XXX What to do in case of error ? */
179		}
180		parent = parent->bus->self;
181	}
182	/* Setup reverse map */
183	for (rid = pe->rid; rid < rid_end; rid++)
184		phb->ioda.pe_rmap[rid] = pe->pe_number;
185
186	/* Setup one MVTs on IODA1 */
187	if (phb->type == PNV_PHB_IODA1) {
188		pe->mve_number = pe->pe_number;
189		rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
190				      pe->pe_number);
191		if (rc) {
192			pe_err(pe, "OPAL error %ld setting up MVE %d\n",
193			       rc, pe->mve_number);
194			pe->mve_number = -1;
195		} else {
196			rc = opal_pci_set_mve_enable(phb->opal_id,
197						     pe->mve_number, OPAL_ENABLE_MVE);
198			if (rc) {
199				pe_err(pe, "OPAL error %ld enabling MVE %d\n",
200				       rc, pe->mve_number);
201				pe->mve_number = -1;
202			}
203		}
204	} else if (phb->type == PNV_PHB_IODA2)
205		pe->mve_number = 0;
206
207	return 0;
208}
209
210static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
211				       struct pnv_ioda_pe *pe)
212{
213	struct pnv_ioda_pe *lpe;
214
215	list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
216		if (lpe->dma_weight < pe->dma_weight) {
217			list_add_tail(&pe->dma_link, &lpe->dma_link);
218			return;
219		}
220	}
221	list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
222}
223
224static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
225{
226	/* This is quite simplistic. The "base" weight of a device
227	 * is 10. 0 means no DMA is to be accounted for it.
228	 */
229
230	/* If it's a bridge, no DMA */
231	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
232		return 0;
233
234	/* Reduce the weight of slow USB controllers */
235	if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
236	    dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
237	    dev->class == PCI_CLASS_SERIAL_USB_EHCI)
238		return 3;
239
240	/* Increase the weight of RAID (includes Obsidian) */
241	if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
242		return 15;
243
244	/* Default */
245	return 10;
246}
247
248#if 0
249static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
250{
251	struct pci_controller *hose = pci_bus_to_host(dev->bus);
252	struct pnv_phb *phb = hose->private_data;
253	struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
254	struct pnv_ioda_pe *pe;
255	int pe_num;
256
257	if (!pdn) {
258		pr_err("%s: Device tree node not associated properly\n",
259			   pci_name(dev));
260		return NULL;
261	}
262	if (pdn->pe_number != IODA_INVALID_PE)
263		return NULL;
264
265	/* PE#0 has been pre-set */
266	if (dev->bus->number == 0)
267		pe_num = 0;
268	else
269		pe_num = pnv_ioda_alloc_pe(phb);
270	if (pe_num == IODA_INVALID_PE) {
271		pr_warning("%s: Not enough PE# available, disabling device\n",
272			   pci_name(dev));
273		return NULL;
274	}
275
276	/* NOTE: We get only one ref to the pci_dev for the pdn, not for the
277	 * pointer in the PE data structure, both should be destroyed at the
278	 * same time. However, this needs to be looked at more closely again
279	 * once we actually start removing things (Hotplug, SR-IOV, ...)
280	 *
281	 * At some point we want to remove the PDN completely anyways
282	 */
283	pe = &phb->ioda.pe_array[pe_num];
284	pci_dev_get(dev);
285	pdn->pcidev = dev;
286	pdn->pe_number = pe_num;
287	pe->pdev = dev;
288	pe->pbus = NULL;
289	pe->tce32_seg = -1;
290	pe->mve_number = -1;
291	pe->rid = dev->bus->number << 8 | pdn->devfn;
292
293	pe_info(pe, "Associated device to PE\n");
294
295	if (pnv_ioda_configure_pe(phb, pe)) {
296		/* XXX What do we do here ? */
297		if (pe_num)
298			pnv_ioda_free_pe(phb, pe_num);
299		pdn->pe_number = IODA_INVALID_PE;
300		pe->pdev = NULL;
301		pci_dev_put(dev);
302		return NULL;
303	}
304
305	/* Assign a DMA weight to the device */
306	pe->dma_weight = pnv_ioda_dma_weight(dev);
307	if (pe->dma_weight != 0) {
308		phb->ioda.dma_weight += pe->dma_weight;
309		phb->ioda.dma_pe_count++;
310	}
311
312	/* Link the PE */
313	pnv_ioda_link_pe_by_weight(phb, pe);
314
315	return pe;
316}
317#endif /* Useful for SRIOV case */
318
319static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
320{
321	struct pci_dev *dev;
322
323	list_for_each_entry(dev, &bus->devices, bus_list) {
324		struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
325
326		if (pdn == NULL) {
327			pr_warn("%s: No device node associated with device !\n",
328				pci_name(dev));
329			continue;
330		}
331		pci_dev_get(dev);
332		pdn->pcidev = dev;
333		pdn->pe_number = pe->pe_number;
334		pe->dma_weight += pnv_ioda_dma_weight(dev);
335		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
336			pnv_ioda_setup_same_PE(dev->subordinate, pe);
337	}
338}
339
340/*
341 * There're 2 types of PCI bus sensitive PEs: One that is compromised of
342 * single PCI bus. Another one that contains the primary PCI bus and its
343 * subordinate PCI devices and buses. The second type of PE is normally
344 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
345 */
346static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
347{
348	struct pci_controller *hose = pci_bus_to_host(bus);
349	struct pnv_phb *phb = hose->private_data;
350	struct pnv_ioda_pe *pe;
351	int pe_num;
352
353	pe_num = pnv_ioda_alloc_pe(phb);
354	if (pe_num == IODA_INVALID_PE) {
355		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
356			__func__, pci_domain_nr(bus), bus->number);
357		return;
358	}
359
360	pe = &phb->ioda.pe_array[pe_num];
361	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
362	pe->pbus = bus;
363	pe->pdev = NULL;
364	pe->tce32_seg = -1;
365	pe->mve_number = -1;
366	pe->rid = bus->busn_res.start << 8;
367	pe->dma_weight = 0;
368
369	if (all)
370		pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
371			bus->busn_res.start, bus->busn_res.end, pe_num);
372	else
373		pe_info(pe, "Secondary bus %d associated with PE#%d\n",
374			bus->busn_res.start, pe_num);
375
376	if (pnv_ioda_configure_pe(phb, pe)) {
377		/* XXX What do we do here ? */
378		if (pe_num)
379			pnv_ioda_free_pe(phb, pe_num);
380		pe->pbus = NULL;
381		return;
382	}
383
384	/* Associate it with all child devices */
385	pnv_ioda_setup_same_PE(bus, pe);
386
387	/* Put PE to the list */
388	list_add_tail(&pe->list, &phb->ioda.pe_list);
389
390	/* Account for one DMA PE if at least one DMA capable device exist
391	 * below the bridge
392	 */
393	if (pe->dma_weight != 0) {
394		phb->ioda.dma_weight += pe->dma_weight;
395		phb->ioda.dma_pe_count++;
396	}
397
398	/* Link the PE */
399	pnv_ioda_link_pe_by_weight(phb, pe);
400}
401
402static void pnv_ioda_setup_PEs(struct pci_bus *bus)
403{
404	struct pci_dev *dev;
405
406	pnv_ioda_setup_bus_PE(bus, 0);
407
408	list_for_each_entry(dev, &bus->devices, bus_list) {
409		if (dev->subordinate) {
410			if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
411				pnv_ioda_setup_bus_PE(dev->subordinate, 1);
412			else
413				pnv_ioda_setup_PEs(dev->subordinate);
414		}
415	}
416}
417
418/*
419 * Configure PEs so that the downstream PCI buses and devices
420 * could have their associated PE#. Unfortunately, we didn't
421 * figure out the way to identify the PLX bridge yet. So we
422 * simply put the PCI bus and the subordinate behind the root
423 * port to PE# here. The game rule here is expected to be changed
424 * as soon as we can detected PLX bridge correctly.
425 */
426static void pnv_pci_ioda_setup_PEs(void)
427{
428	struct pci_controller *hose, *tmp;
429
430	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
431		pnv_ioda_setup_PEs(hose->bus);
432	}
433}
434
435static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *dev)
436{
437	/* We delay DMA setup after we have assigned all PE# */
438}
439
440static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
441{
442	struct pci_dev *dev;
443
444	list_for_each_entry(dev, &bus->devices, bus_list) {
445		set_iommu_table_base(&dev->dev, &pe->tce32_table);
446		if (dev->subordinate)
447			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
448	}
449}
450
451static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
452				      struct pnv_ioda_pe *pe, unsigned int base,
453				      unsigned int segs)
454{
455
456	struct page *tce_mem = NULL;
457	const __be64 *swinvp;
458	struct iommu_table *tbl;
459	unsigned int i;
460	int64_t rc;
461	void *addr;
462
463	/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
464#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
465
466	/* XXX FIXME: Handle 64-bit only DMA devices */
467	/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
468	/* XXX FIXME: Allocate multi-level tables on PHB3 */
469
470	/* We shouldn't already have a 32-bit DMA associated */
471	if (WARN_ON(pe->tce32_seg >= 0))
472		return;
473
474	/* Grab a 32-bit TCE table */
475	pe->tce32_seg = base;
476	pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
477		(base << 28), ((base + segs) << 28) - 1);
478
479	/* XXX Currently, we allocate one big contiguous table for the
480	 * TCEs. We only really need one chunk per 256M of TCE space
481	 * (ie per segment) but that's an optimization for later, it
482	 * requires some added smarts with our get/put_tce implementation
483	 */
484	tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
485				   get_order(TCE32_TABLE_SIZE * segs));
486	if (!tce_mem) {
487		pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
488		goto fail;
489	}
490	addr = page_address(tce_mem);
491	memset(addr, 0, TCE32_TABLE_SIZE * segs);
492
493	/* Configure HW */
494	for (i = 0; i < segs; i++) {
495		rc = opal_pci_map_pe_dma_window(phb->opal_id,
496					      pe->pe_number,
497					      base + i, 1,
498					      __pa(addr) + TCE32_TABLE_SIZE * i,
499					      TCE32_TABLE_SIZE, 0x1000);
500		if (rc) {
501			pe_err(pe, " Failed to configure 32-bit TCE table,"
502			       " err %ld\n", rc);
503			goto fail;
504		}
505	}
506
507	/* Setup linux iommu table */
508	tbl = &pe->tce32_table;
509	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
510				  base << 28);
511
512	/* OPAL variant of P7IOC SW invalidated TCEs */
513	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
514	if (swinvp) {
515		/* We need a couple more fields -- an address and a data
516		 * to or.  Since the bus is only printed out on table free
517		 * errors, and on the first pass the data will be a relative
518		 * bus number, print that out instead.
519		 */
520		tbl->it_busno = 0;
521		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
522		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
523			| TCE_PCI_SWINV_PAIR;
524	}
525	iommu_init_table(tbl, phb->hose->node);
526
527	if (pe->pdev)
528		set_iommu_table_base(&pe->pdev->dev, tbl);
529	else
530		pnv_ioda_setup_bus_dma(pe, pe->pbus);
531
532	return;
533 fail:
534	/* XXX Failure: Try to fallback to 64-bit only ? */
535	if (pe->tce32_seg >= 0)
536		pe->tce32_seg = -1;
537	if (tce_mem)
538		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
539}
540
541static void pnv_ioda_setup_dma(struct pnv_phb *phb)
542{
543	struct pci_controller *hose = phb->hose;
544	unsigned int residual, remaining, segs, tw, base;
545	struct pnv_ioda_pe *pe;
546
547	/* If we have more PE# than segments available, hand out one
548	 * per PE until we run out and let the rest fail. If not,
549	 * then we assign at least one segment per PE, plus more based
550	 * on the amount of devices under that PE
551	 */
552	if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
553		residual = 0;
554	else
555		residual = phb->ioda.tce32_count -
556			phb->ioda.dma_pe_count;
557
558	pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
559		hose->global_number, phb->ioda.tce32_count);
560	pr_info("PCI: %d PE# for a total weight of %d\n",
561		phb->ioda.dma_pe_count, phb->ioda.dma_weight);
562
563	/* Walk our PE list and configure their DMA segments, hand them
564	 * out one base segment plus any residual segments based on
565	 * weight
566	 */
567	remaining = phb->ioda.tce32_count;
568	tw = phb->ioda.dma_weight;
569	base = 0;
570	list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
571		if (!pe->dma_weight)
572			continue;
573		if (!remaining) {
574			pe_warn(pe, "No DMA32 resources available\n");
575			continue;
576		}
577		segs = 1;
578		if (residual) {
579			segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
580			if (segs > remaining)
581				segs = remaining;
582		}
583		pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
584			pe->dma_weight, segs);
585		pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
586		remaining -= segs;
587		base += segs;
588	}
589}
590
591#ifdef CONFIG_PCI_MSI
592static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
593				  unsigned int hwirq, unsigned int is_64,
594				  struct msi_msg *msg)
595{
596	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
597	unsigned int xive_num = hwirq - phb->msi_base;
598	uint64_t addr64;
599	uint32_t addr32, data;
600	int rc;
601
602	/* No PE assigned ? bail out ... no MSI for you ! */
603	if (pe == NULL)
604		return -ENXIO;
605
606	/* Check if we have an MVE */
607	if (pe->mve_number < 0)
608		return -ENXIO;
609
610	/* Assign XIVE to PE */
611	rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
612	if (rc) {
613		pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
614			pci_name(dev), rc, xive_num);
615		return -EIO;
616	}
617
618	if (is_64) {
619		rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
620				     &addr64, &data);
621		if (rc) {
622			pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
623				pci_name(dev), rc);
624			return -EIO;
625		}
626		msg->address_hi = addr64 >> 32;
627		msg->address_lo = addr64 & 0xfffffffful;
628	} else {
629		rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
630				     &addr32, &data);
631		if (rc) {
632			pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
633				pci_name(dev), rc);
634			return -EIO;
635		}
636		msg->address_hi = 0;
637		msg->address_lo = addr32;
638	}
639	msg->data = data;
640
641	pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
642		 " address=%x_%08x data=%x PE# %d\n",
643		 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
644		 msg->address_hi, msg->address_lo, data, pe->pe_number);
645
646	return 0;
647}
648
649static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
650{
651	unsigned int count;
652	const __be32 *prop = of_get_property(phb->hose->dn,
653					     "ibm,opal-msi-ranges", NULL);
654	if (!prop) {
655		/* BML Fallback */
656		prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
657	}
658	if (!prop)
659		return;
660
661	phb->msi_base = be32_to_cpup(prop);
662	count = be32_to_cpup(prop + 1);
663	if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
664		pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
665		       phb->hose->global_number);
666		return;
667	}
668
669	phb->msi_setup = pnv_pci_ioda_msi_setup;
670	phb->msi32_support = 1;
671	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
672		count, phb->msi_base);
673}
674#else
675static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
676#endif /* CONFIG_PCI_MSI */
677
678/*
679 * This function is supposed to be called on basis of PE from top
680 * to bottom style. So the the I/O or MMIO segment assigned to
681 * parent PE could be overrided by its child PEs if necessary.
682 */
683static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
684				  struct pnv_ioda_pe *pe)
685{
686	struct pnv_phb *phb = hose->private_data;
687	struct pci_bus_region region;
688	struct resource *res;
689	int i, index;
690	int rc;
691
692	/*
693	 * NOTE: We only care PCI bus based PE for now. For PCI
694	 * device based PE, for example SRIOV sensitive VF should
695	 * be figured out later.
696	 */
697	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
698
699	pci_bus_for_each_resource(pe->pbus, res, i) {
700		if (!res || !res->flags ||
701		    res->start > res->end)
702			continue;
703
704		if (res->flags & IORESOURCE_IO) {
705			region.start = res->start - phb->ioda.io_pci_base;
706			region.end   = res->end - phb->ioda.io_pci_base;
707			index = region.start / phb->ioda.io_segsize;
708
709			while (index < phb->ioda.total_pe &&
710			       region.start <= region.end) {
711				phb->ioda.io_segmap[index] = pe->pe_number;
712				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
713					pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
714				if (rc != OPAL_SUCCESS) {
715					pr_err("%s: OPAL error %d when mapping IO "
716					       "segment #%d to PE#%d\n",
717					       __func__, rc, index, pe->pe_number);
718					break;
719				}
720
721				region.start += phb->ioda.io_segsize;
722				index++;
723			}
724		} else if (res->flags & IORESOURCE_MEM) {
725			region.start = res->start -
726				       hose->pci_mem_offset -
727				       phb->ioda.m32_pci_base;
728			region.end   = res->end -
729				       hose->pci_mem_offset -
730				       phb->ioda.m32_pci_base;
731			index = region.start / phb->ioda.m32_segsize;
732
733			while (index < phb->ioda.total_pe &&
734			       region.start <= region.end) {
735				phb->ioda.m32_segmap[index] = pe->pe_number;
736				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
737					pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
738				if (rc != OPAL_SUCCESS) {
739					pr_err("%s: OPAL error %d when mapping M32 "
740					       "segment#%d to PE#%d",
741					       __func__, rc, index, pe->pe_number);
742					break;
743				}
744
745				region.start += phb->ioda.m32_segsize;
746				index++;
747			}
748		}
749	}
750}
751
752static void pnv_pci_ioda_setup_seg(void)
753{
754	struct pci_controller *tmp, *hose;
755	struct pnv_phb *phb;
756	struct pnv_ioda_pe *pe;
757
758	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
759		phb = hose->private_data;
760		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
761			pnv_ioda_setup_pe_seg(hose, pe);
762		}
763	}
764}
765
766static void pnv_pci_ioda_setup_DMA(void)
767{
768	struct pci_controller *hose, *tmp;
769	struct pnv_phb *phb;
770
771	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
772		pnv_ioda_setup_dma(hose->private_data);
773
774		/* Mark the PHB initialization done */
775		phb = hose->private_data;
776		phb->initialized = 1;
777	}
778}
779
780static void pnv_pci_ioda_fixup(void)
781{
782	pnv_pci_ioda_setup_PEs();
783	pnv_pci_ioda_setup_seg();
784	pnv_pci_ioda_setup_DMA();
785}
786
787/*
788 * Returns the alignment for I/O or memory windows for P2P
789 * bridges. That actually depends on how PEs are segmented.
790 * For now, we return I/O or M32 segment size for PE sensitive
791 * P2P bridges. Otherwise, the default values (4KiB for I/O,
792 * 1MiB for memory) will be returned.
793 *
794 * The current PCI bus might be put into one PE, which was
795 * create against the parent PCI bridge. For that case, we
796 * needn't enlarge the alignment so that we can save some
797 * resources.
798 */
799static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
800						unsigned long type)
801{
802	struct pci_dev *bridge;
803	struct pci_controller *hose = pci_bus_to_host(bus);
804	struct pnv_phb *phb = hose->private_data;
805	int num_pci_bridges = 0;
806
807	bridge = bus->self;
808	while (bridge) {
809		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
810			num_pci_bridges++;
811			if (num_pci_bridges >= 2)
812				return 1;
813		}
814
815		bridge = bridge->bus->self;
816	}
817
818	/* We need support prefetchable memory window later */
819	if (type & IORESOURCE_MEM)
820		return phb->ioda.m32_segsize;
821
822	return phb->ioda.io_segsize;
823}
824
825/* Prevent enabling devices for which we couldn't properly
826 * assign a PE
827 */
828static int pnv_pci_enable_device_hook(struct pci_dev *dev)
829{
830	struct pci_controller *hose = pci_bus_to_host(dev->bus);
831	struct pnv_phb *phb = hose->private_data;
832	struct pci_dn *pdn;
833
834	/* The function is probably called while the PEs have
835	 * not be created yet. For example, resource reassignment
836	 * during PCI probe period. We just skip the check if
837	 * PEs isn't ready.
838	 */
839	if (!phb->initialized)
840		return 0;
841
842	pdn = pnv_ioda_get_pdn(dev);
843	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
844		return -EINVAL;
845
846	return 0;
847}
848
849static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
850			       u32 devfn)
851{
852	return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
853}
854
855void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
856{
857	struct pci_controller *hose;
858	static int primary = 1;
859	struct pnv_phb *phb;
860	unsigned long size, m32map_off, iomap_off, pemap_off;
861	const u64 *prop64;
862	const u32 *prop32;
863	u64 phb_id;
864	void *aux;
865	long rc;
866
867	pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
868
869	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
870	if (!prop64) {
871		pr_err("  Missing \"ibm,opal-phbid\" property !\n");
872		return;
873	}
874	phb_id = be64_to_cpup(prop64);
875	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
876
877	phb = alloc_bootmem(sizeof(struct pnv_phb));
878	if (phb) {
879		memset(phb, 0, sizeof(struct pnv_phb));
880		phb->hose = hose = pcibios_alloc_controller(np);
881	}
882	if (!phb || !phb->hose) {
883		pr_err("PCI: Failed to allocate PCI controller for %s\n",
884		       np->full_name);
885		return;
886	}
887
888	spin_lock_init(&phb->lock);
889	/* XXX Use device-tree */
890	hose->first_busno = 0;
891	hose->last_busno = 0xff;
892	hose->private_data = phb;
893	phb->opal_id = phb_id;
894	phb->type = ioda_type;
895
896	/* Detect specific models for error handling */
897	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
898		phb->model = PNV_PHB_MODEL_P7IOC;
899	else if (of_device_is_compatible(np, "ibm,p8-pciex"))
900		phb->model = PNV_PHB_MODEL_PHB3;
901	else
902		phb->model = PNV_PHB_MODEL_UNKNOWN;
903
904	/* Parse 32-bit and IO ranges (if any) */
905	pci_process_bridge_OF_ranges(phb->hose, np, primary);
906	primary = 0;
907
908	/* Get registers */
909	phb->regs = of_iomap(np, 0);
910	if (phb->regs == NULL)
911		pr_err("  Failed to map registers !\n");
912
913	/* Initialize more IODA stuff */
914	prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
915	if (!prop32)
916		phb->ioda.total_pe = 1;
917	else
918		phb->ioda.total_pe = *prop32;
919
920	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
921	/* FW Has already off top 64k of M32 space (MSI space) */
922	phb->ioda.m32_size += 0x10000;
923
924	phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
925	phb->ioda.m32_pci_base = hose->mem_resources[0].start -
926		hose->pci_mem_offset;
927	phb->ioda.io_size = hose->pci_io_size;
928	phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
929	phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
930
931	/* Allocate aux data & arrays
932	 *
933	 * XXX TODO: Don't allocate io segmap on PHB3
934	 */
935	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
936	m32map_off = size;
937	size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
938	iomap_off = size;
939	size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
940	pemap_off = size;
941	size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
942	aux = alloc_bootmem(size);
943	memset(aux, 0, size);
944	phb->ioda.pe_alloc = aux;
945	phb->ioda.m32_segmap = aux + m32map_off;
946	phb->ioda.io_segmap = aux + iomap_off;
947	phb->ioda.pe_array = aux + pemap_off;
948	set_bit(0, phb->ioda.pe_alloc);
949
950	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
951	INIT_LIST_HEAD(&phb->ioda.pe_list);
952
953	/* Calculate how many 32-bit TCE segments we have */
954	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
955
956	/* Clear unusable m64 */
957	hose->mem_resources[1].flags = 0;
958	hose->mem_resources[1].start = 0;
959	hose->mem_resources[1].end = 0;
960	hose->mem_resources[2].flags = 0;
961	hose->mem_resources[2].start = 0;
962	hose->mem_resources[2].end = 0;
963
964#if 0 /* We should really do that ... */
965	rc = opal_pci_set_phb_mem_window(opal->phb_id,
966					 window_type,
967					 window_num,
968					 starting_real_address,
969					 starting_pci_address,
970					 segment_size);
971#endif
972
973	pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
974		phb->ioda.total_pe,
975		phb->ioda.m32_size, phb->ioda.m32_segsize,
976		phb->ioda.io_size, phb->ioda.io_segsize);
977
978	phb->hose->ops = &pnv_pci_ops;
979
980	/* Setup RID -> PE mapping function */
981	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
982
983	/* Setup TCEs */
984	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
985
986	/* Setup MSI support */
987	pnv_pci_init_ioda_msis(phb);
988
989	/*
990	 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
991	 * to let the PCI core do resource assignment. It's supposed
992	 * that the PCI core will do correct I/O and MMIO alignment
993	 * for the P2P bridge bars so that each PCI bus (excluding
994	 * the child P2P bridges) can form individual PE.
995	 */
996	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
997	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
998	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
999	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1000
1001	/* Reset IODA tables to a clean state */
1002	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1003	if (rc)
1004		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
1005
1006	/*
1007	 * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
1008	 * has cleared the RTT which has the same effect
1009	 */
1010	if (ioda_type == PNV_PHB_IODA1)
1011		opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1012}
1013
1014void pnv_pci_init_ioda2_phb(struct device_node *np)
1015{
1016	pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
1017}
1018
1019void __init pnv_pci_init_ioda_hub(struct device_node *np)
1020{
1021	struct device_node *phbn;
1022	const u64 *prop64;
1023	u64 hub_id;
1024
1025	pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1026
1027	prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1028	if (!prop64) {
1029		pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1030		return;
1031	}
1032	hub_id = be64_to_cpup(prop64);
1033	pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1034
1035	/* Count child PHBs */
1036	for_each_child_of_node(np, phbn) {
1037		/* Look for IODA1 PHBs */
1038		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1039			pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
1040	}
1041}
1042