1/*
2 * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
3 * initial domain support. We also handle the DSDT _PRT callbacks for GSI's
4 * used in HVM and initial domain mode (PV does not parse ACPI, so it has no
5 * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and
6 * 0xcf8 PCI configuration read/write.
7 *
8 *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
9 *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
10 *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>
11 */
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/pci.h>
15#include <linux/acpi.h>
16
17#include <linux/io.h>
18#include <asm/io_apic.h>
19#include <asm/pci_x86.h>
20
21#include <asm/xen/hypervisor.h>
22
23#include <xen/features.h>
24#include <xen/events.h>
25#include <asm/xen/pci.h>
26#include <asm/i8259.h>
27
28static int xen_pcifront_enable_irq(struct pci_dev *dev)
29{
30	int rc;
31	int share = 1;
32	int pirq;
33	u8 gsi;
34
35	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
36	if (rc < 0) {
37		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
38			 rc);
39		return rc;
40	}
41	/* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
42	pirq = gsi;
43
44	if (gsi < nr_legacy_irqs())
45		share = 0;
46
47	rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
48	if (rc < 0) {
49		dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
50			 gsi, pirq, rc);
51		return rc;
52	}
53
54	dev->irq = rc;
55	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
56	return 0;
57}
58
59#ifdef CONFIG_ACPI
60static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
61			     bool set_pirq)
62{
63	int rc, pirq = -1, irq = -1;
64	struct physdev_map_pirq map_irq;
65	int shareable = 0;
66	char *name;
67
68	irq = xen_irq_from_gsi(gsi);
69	if (irq > 0)
70		return irq;
71
72	if (set_pirq)
73		pirq = gsi;
74
75	map_irq.domid = DOMID_SELF;
76	map_irq.type = MAP_PIRQ_TYPE_GSI;
77	map_irq.index = gsi;
78	map_irq.pirq = pirq;
79
80	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
81	if (rc) {
82		printk(KERN_WARNING "xen map irq failed %d\n", rc);
83		return -1;
84	}
85
86	if (triggering == ACPI_EDGE_SENSITIVE) {
87		shareable = 0;
88		name = "ioapic-edge";
89	} else {
90		shareable = 1;
91		name = "ioapic-level";
92	}
93
94	if (gsi_override >= 0)
95		gsi = gsi_override;
96
97	irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
98	if (irq < 0)
99		goto out;
100
101	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi);
102out:
103	return irq;
104}
105
106static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
107				     int trigger, int polarity)
108{
109	if (!xen_hvm_domain())
110		return -1;
111
112	return xen_register_pirq(gsi, -1 /* no GSI override */, trigger,
113				 false /* no mapping of GSI to PIRQ */);
114}
115
116#ifdef CONFIG_XEN_DOM0
117static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
118{
119	int rc, irq;
120	struct physdev_setup_gsi setup_gsi;
121
122	if (!xen_pv_domain())
123		return -1;
124
125	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
126			gsi, triggering, polarity);
127
128	irq = xen_register_pirq(gsi, gsi_override, triggering, true);
129
130	setup_gsi.gsi = gsi;
131	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
132	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
133
134	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
135	if (rc == -EEXIST)
136		printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
137	else if (rc) {
138		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
139				gsi, rc);
140	}
141
142	return irq;
143}
144
145static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
146				 int trigger, int polarity)
147{
148	return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
149}
150#endif
151#endif
152
153#if defined(CONFIG_PCI_MSI)
154#include <linux/msi.h>
155#include <asm/msidef.h>
156
157struct xen_pci_frontend_ops *xen_pci_frontend;
158EXPORT_SYMBOL_GPL(xen_pci_frontend);
159
160static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
161{
162	int irq, ret, i;
163	struct msi_desc *msidesc;
164	int *v;
165
166	if (type == PCI_CAP_ID_MSI && nvec > 1)
167		return 1;
168
169	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
170	if (!v)
171		return -ENOMEM;
172
173	if (type == PCI_CAP_ID_MSIX)
174		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
175	else
176		ret = xen_pci_frontend_enable_msi(dev, v);
177	if (ret)
178		goto error;
179	i = 0;
180	list_for_each_entry(msidesc, &dev->msi_list, list) {
181		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
182					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
183					       (type == PCI_CAP_ID_MSIX) ?
184					       "pcifront-msi-x" :
185					       "pcifront-msi",
186						DOMID_SELF);
187		if (irq < 0) {
188			ret = irq;
189			goto free;
190		}
191		i++;
192	}
193	kfree(v);
194	return 0;
195
196error:
197	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
198free:
199	kfree(v);
200	return ret;
201}
202
203#define XEN_PIRQ_MSI_DATA  (MSI_DATA_TRIGGER_EDGE | \
204		MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
205
206static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
207		struct msi_msg *msg)
208{
209	/* We set vector == 0 to tell the hypervisor we don't care about it,
210	 * but we want a pirq setup instead.
211	 * We use the dest_id field to pass the pirq that we want. */
212	msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
213	msg->address_lo =
214		MSI_ADDR_BASE_LO |
215		MSI_ADDR_DEST_MODE_PHYSICAL |
216		MSI_ADDR_REDIRECTION_CPU |
217		MSI_ADDR_DEST_ID(pirq);
218
219	msg->data = XEN_PIRQ_MSI_DATA;
220}
221
222static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
223{
224	int irq, pirq;
225	struct msi_desc *msidesc;
226	struct msi_msg msg;
227
228	if (type == PCI_CAP_ID_MSI && nvec > 1)
229		return 1;
230
231	list_for_each_entry(msidesc, &dev->msi_list, list) {
232		__read_msi_msg(msidesc, &msg);
233		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
234			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
235		if (msg.data != XEN_PIRQ_MSI_DATA ||
236		    xen_irq_from_pirq(pirq) < 0) {
237			pirq = xen_allocate_pirq_msi(dev, msidesc);
238			if (pirq < 0) {
239				irq = -ENODEV;
240				goto error;
241			}
242			xen_msi_compose_msg(dev, pirq, &msg);
243			__write_msi_msg(msidesc, &msg);
244			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
245		} else {
246			dev_dbg(&dev->dev,
247				"xen: msi already bound to pirq=%d\n", pirq);
248		}
249		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
250					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
251					       (type == PCI_CAP_ID_MSIX) ?
252					       "msi-x" : "msi",
253					       DOMID_SELF);
254		if (irq < 0)
255			goto error;
256		dev_dbg(&dev->dev,
257			"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
258	}
259	return 0;
260
261error:
262	dev_err(&dev->dev,
263		"Xen PCI frontend has not registered MSI/MSI-X support!\n");
264	return irq;
265}
266
267#ifdef CONFIG_XEN_DOM0
268static bool __read_mostly pci_seg_supported = true;
269
270static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
271{
272	int ret = 0;
273	struct msi_desc *msidesc;
274
275	list_for_each_entry(msidesc, &dev->msi_list, list) {
276		struct physdev_map_pirq map_irq;
277		domid_t domid;
278
279		domid = ret = xen_find_device_domain_owner(dev);
280		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
281		 * hence check ret value for < 0. */
282		if (ret < 0)
283			domid = DOMID_SELF;
284
285		memset(&map_irq, 0, sizeof(map_irq));
286		map_irq.domid = domid;
287		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
288		map_irq.index = -1;
289		map_irq.pirq = -1;
290		map_irq.bus = dev->bus->number |
291			      (pci_domain_nr(dev->bus) << 16);
292		map_irq.devfn = dev->devfn;
293
294		if (type == PCI_CAP_ID_MSI && nvec > 1) {
295			map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
296			map_irq.entry_nr = nvec;
297		} else if (type == PCI_CAP_ID_MSIX) {
298			int pos;
299			u32 table_offset, bir;
300
301			pos = dev->msix_cap;
302			pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
303					      &table_offset);
304			bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
305
306			map_irq.table_base = pci_resource_start(dev, bir);
307			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
308		}
309
310		ret = -EINVAL;
311		if (pci_seg_supported)
312			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
313						    &map_irq);
314		if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) {
315			/*
316			 * If MAP_PIRQ_TYPE_MULTI_MSI is not available
317			 * there's nothing else we can do in this case.
318			 * Just set ret > 0 so driver can retry with
319			 * single MSI.
320			 */
321			ret = 1;
322			goto out;
323		}
324		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
325			map_irq.type = MAP_PIRQ_TYPE_MSI;
326			map_irq.index = -1;
327			map_irq.pirq = -1;
328			map_irq.bus = dev->bus->number;
329			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
330						    &map_irq);
331			if (ret != -EINVAL)
332				pci_seg_supported = false;
333		}
334		if (ret) {
335			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
336				 ret, domid);
337			goto out;
338		}
339
340		ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq,
341		                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
342		                               (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi",
343		                               domid);
344		if (ret < 0)
345			goto out;
346	}
347	ret = 0;
348out:
349	return ret;
350}
351
352static void xen_initdom_restore_msi_irqs(struct pci_dev *dev)
353{
354	int ret = 0;
355
356	if (pci_seg_supported) {
357		struct physdev_pci_device restore_ext;
358
359		restore_ext.seg = pci_domain_nr(dev->bus);
360		restore_ext.bus = dev->bus->number;
361		restore_ext.devfn = dev->devfn;
362		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
363					&restore_ext);
364		if (ret == -ENOSYS)
365			pci_seg_supported = false;
366		WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
367	}
368	if (!pci_seg_supported) {
369		struct physdev_restore_msi restore;
370
371		restore.bus = dev->bus->number;
372		restore.devfn = dev->devfn;
373		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
374		WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
375	}
376}
377#endif
378
379static void xen_teardown_msi_irqs(struct pci_dev *dev)
380{
381	struct msi_desc *msidesc;
382
383	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
384	if (msidesc->msi_attrib.is_msix)
385		xen_pci_frontend_disable_msix(dev);
386	else
387		xen_pci_frontend_disable_msi(dev);
388
389	/* Free the IRQ's and the msidesc using the generic code. */
390	default_teardown_msi_irqs(dev);
391}
392
393static void xen_teardown_msi_irq(unsigned int irq)
394{
395	xen_destroy_irq(irq);
396}
397static u32 xen_nop_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
398{
399	return 0;
400}
401static u32 xen_nop_msix_mask_irq(struct msi_desc *desc, u32 flag)
402{
403	return 0;
404}
405#endif
406
407int __init pci_xen_init(void)
408{
409	if (!xen_pv_domain() || xen_initial_domain())
410		return -ENODEV;
411
412	printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
413
414	pcibios_set_cache_line_size();
415
416	pcibios_enable_irq = xen_pcifront_enable_irq;
417	pcibios_disable_irq = NULL;
418
419#ifdef CONFIG_ACPI
420	/* Keep ACPI out of the picture */
421	acpi_noirq = 1;
422#endif
423
424#ifdef CONFIG_PCI_MSI
425	x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
426	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
427	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
428	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
429	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
430#endif
431	return 0;
432}
433
434int __init pci_xen_hvm_init(void)
435{
436	if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
437		return 0;
438
439#ifdef CONFIG_ACPI
440	/*
441	 * We don't want to change the actual ACPI delivery model,
442	 * just how GSIs get registered.
443	 */
444	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
445#endif
446
447#ifdef CONFIG_PCI_MSI
448	x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
449	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
450#endif
451	return 0;
452}
453
454#ifdef CONFIG_XEN_DOM0
455static __init void xen_setup_acpi_sci(void)
456{
457	int rc;
458	int trigger, polarity;
459	int gsi = acpi_sci_override_gsi;
460	int irq = -1;
461	int gsi_override = -1;
462
463	if (!gsi)
464		return;
465
466	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
467	if (rc) {
468		printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
469				" sci, rc=%d\n", rc);
470		return;
471	}
472	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
473	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
474
475	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
476			"polarity=%d\n", gsi, trigger, polarity);
477
478	/* Before we bind the GSI to a Linux IRQ, check whether
479	 * we need to override it with bus_irq (IRQ) value. Usually for
480	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
481	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
482	 * but there are oddballs where the IRQ != GSI:
483	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
484	 * which ends up being: gsi_to_irq[9] == 20
485	 * (which is what acpi_gsi_to_irq ends up calling when starting the
486	 * the ACPI interpreter and keels over since IRQ 9 has not been
487	 * setup as we had setup IRQ 20 for it).
488	 */
489	if (acpi_gsi_to_irq(gsi, &irq) == 0) {
490		/* Use the provided value if it's valid. */
491		if (irq >= 0)
492			gsi_override = irq;
493	}
494
495	gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
496	printk(KERN_INFO "xen: acpi sci %d\n", gsi);
497
498	return;
499}
500
501int __init pci_xen_initial_domain(void)
502{
503	int irq;
504
505#ifdef CONFIG_PCI_MSI
506	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
507	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
508	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
509	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
510	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
511#endif
512	xen_setup_acpi_sci();
513	__acpi_register_gsi = acpi_register_gsi_xen;
514	/* Pre-allocate legacy irqs */
515	for (irq = 0; irq < nr_legacy_irqs(); irq++) {
516		int trigger, polarity;
517
518		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
519			continue;
520
521		xen_register_pirq(irq, -1 /* no GSI override */,
522			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
523			true /* Map GSI to PIRQ */);
524	}
525	if (0 == nr_ioapics) {
526		for (irq = 0; irq < nr_legacy_irqs(); irq++)
527			xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
528	}
529	return 0;
530}
531
532struct xen_device_domain_owner {
533	domid_t domain;
534	struct pci_dev *dev;
535	struct list_head list;
536};
537
538static DEFINE_SPINLOCK(dev_domain_list_spinlock);
539static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
540
541static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
542{
543	struct xen_device_domain_owner *owner;
544
545	list_for_each_entry(owner, &dev_domain_list, list) {
546		if (owner->dev == dev)
547			return owner;
548	}
549	return NULL;
550}
551
552int xen_find_device_domain_owner(struct pci_dev *dev)
553{
554	struct xen_device_domain_owner *owner;
555	int domain = -ENODEV;
556
557	spin_lock(&dev_domain_list_spinlock);
558	owner = find_device(dev);
559	if (owner)
560		domain = owner->domain;
561	spin_unlock(&dev_domain_list_spinlock);
562	return domain;
563}
564EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
565
566int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
567{
568	struct xen_device_domain_owner *owner;
569
570	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
571	if (!owner)
572		return -ENODEV;
573
574	spin_lock(&dev_domain_list_spinlock);
575	if (find_device(dev)) {
576		spin_unlock(&dev_domain_list_spinlock);
577		kfree(owner);
578		return -EEXIST;
579	}
580	owner->domain = domain;
581	owner->dev = dev;
582	list_add_tail(&owner->list, &dev_domain_list);
583	spin_unlock(&dev_domain_list_spinlock);
584	return 0;
585}
586EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
587
588int xen_unregister_device_domain_owner(struct pci_dev *dev)
589{
590	struct xen_device_domain_owner *owner;
591
592	spin_lock(&dev_domain_list_spinlock);
593	owner = find_device(dev);
594	if (!owner) {
595		spin_unlock(&dev_domain_list_spinlock);
596		return -ENODEV;
597	}
598	list_del(&owner->list);
599	spin_unlock(&dev_domain_list_spinlock);
600	kfree(owner);
601	return 0;
602}
603EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
604#endif
605