msi.c revision da8d1c8ba4dcb16d60be54b233deca9a7cac98dc
1/*
2 * File:	msi.c
3 * Purpose:	PCI Message Signaled Interrupt (MSI)
4 *
5 * Copyright (C) 2003-2004 Intel
6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7 */
8
9#include <linux/err.h>
10#include <linux/mm.h>
11#include <linux/irq.h>
12#include <linux/interrupt.h>
13#include <linux/init.h>
14#include <linux/export.h>
15#include <linux/ioport.h>
16#include <linux/pci.h>
17#include <linux/proc_fs.h>
18#include <linux/msi.h>
19#include <linux/smp.h>
20#include <linux/errno.h>
21#include <linux/io.h>
22#include <linux/slab.h>
23
24#include "pci.h"
25#include "msi.h"
26
27static int pci_msi_enable = 1;
28
29/* Arch hooks */
30
31#ifndef arch_msi_check_device
32int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
33{
34	return 0;
35}
36#endif
37
38#ifndef arch_setup_msi_irqs
39# define arch_setup_msi_irqs default_setup_msi_irqs
40# define HAVE_DEFAULT_MSI_SETUP_IRQS
41#endif
42
43#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS
44int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
45{
46	struct msi_desc *entry;
47	int ret;
48
49	/*
50	 * If an architecture wants to support multiple MSI, it needs to
51	 * override arch_setup_msi_irqs()
52	 */
53	if (type == PCI_CAP_ID_MSI && nvec > 1)
54		return 1;
55
56	list_for_each_entry(entry, &dev->msi_list, list) {
57		ret = arch_setup_msi_irq(dev, entry);
58		if (ret < 0)
59			return ret;
60		if (ret > 0)
61			return -ENOSPC;
62	}
63
64	return 0;
65}
66#endif
67
68#ifndef arch_teardown_msi_irqs
69# define arch_teardown_msi_irqs default_teardown_msi_irqs
70# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
71#endif
72
73#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS
74void default_teardown_msi_irqs(struct pci_dev *dev)
75{
76	struct msi_desc *entry;
77
78	list_for_each_entry(entry, &dev->msi_list, list) {
79		int i, nvec;
80		if (entry->irq == 0)
81			continue;
82		nvec = 1 << entry->msi_attrib.multiple;
83		for (i = 0; i < nvec; i++)
84			arch_teardown_msi_irq(entry->irq + i);
85	}
86}
87#endif
88
89static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
90{
91	u16 control;
92
93	BUG_ON(!pos);
94
95	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
96	control &= ~PCI_MSI_FLAGS_ENABLE;
97	if (enable)
98		control |= PCI_MSI_FLAGS_ENABLE;
99	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
100}
101
102static void msix_set_enable(struct pci_dev *dev, int enable)
103{
104	int pos;
105	u16 control;
106
107	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
108	if (pos) {
109		pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
110		control &= ~PCI_MSIX_FLAGS_ENABLE;
111		if (enable)
112			control |= PCI_MSIX_FLAGS_ENABLE;
113		pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
114	}
115}
116
117static inline __attribute_const__ u32 msi_mask(unsigned x)
118{
119	/* Don't shift by >= width of type */
120	if (x >= 5)
121		return 0xffffffff;
122	return (1 << (1 << x)) - 1;
123}
124
125static inline __attribute_const__ u32 msi_capable_mask(u16 control)
126{
127	return msi_mask((control >> 1) & 7);
128}
129
130static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
131{
132	return msi_mask((control >> 4) & 7);
133}
134
135/*
136 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
137 * mask all MSI interrupts by clearing the MSI enable bit does not work
138 * reliably as devices without an INTx disable bit will then generate a
139 * level IRQ which will never be cleared.
140 */
141static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
142{
143	u32 mask_bits = desc->masked;
144
145	if (!desc->msi_attrib.maskbit)
146		return 0;
147
148	mask_bits &= ~mask;
149	mask_bits |= flag;
150	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
151
152	return mask_bits;
153}
154
155static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
156{
157	desc->masked = __msi_mask_irq(desc, mask, flag);
158}
159
160/*
161 * This internal function does not flush PCI writes to the device.
162 * All users must ensure that they read from the device before either
163 * assuming that the device state is up to date, or returning out of this
164 * file.  This saves a few milliseconds when initialising devices with lots
165 * of MSI-X interrupts.
166 */
167static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
168{
169	u32 mask_bits = desc->masked;
170	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
171						PCI_MSIX_ENTRY_VECTOR_CTRL;
172	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
173	if (flag)
174		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
175	writel(mask_bits, desc->mask_base + offset);
176
177	return mask_bits;
178}
179
180static void msix_mask_irq(struct msi_desc *desc, u32 flag)
181{
182	desc->masked = __msix_mask_irq(desc, flag);
183}
184
185static void msi_set_mask_bit(struct irq_data *data, u32 flag)
186{
187	struct msi_desc *desc = irq_data_get_msi(data);
188
189	if (desc->msi_attrib.is_msix) {
190		msix_mask_irq(desc, flag);
191		readl(desc->mask_base);		/* Flush write to device */
192	} else {
193		unsigned offset = data->irq - desc->dev->irq;
194		msi_mask_irq(desc, 1 << offset, flag << offset);
195	}
196}
197
198void mask_msi_irq(struct irq_data *data)
199{
200	msi_set_mask_bit(data, 1);
201}
202
203void unmask_msi_irq(struct irq_data *data)
204{
205	msi_set_mask_bit(data, 0);
206}
207
208void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
209{
210	BUG_ON(entry->dev->current_state != PCI_D0);
211
212	if (entry->msi_attrib.is_msix) {
213		void __iomem *base = entry->mask_base +
214			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
215
216		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
217		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
218		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
219	} else {
220		struct pci_dev *dev = entry->dev;
221		int pos = entry->msi_attrib.pos;
222		u16 data;
223
224		pci_read_config_dword(dev, msi_lower_address_reg(pos),
225					&msg->address_lo);
226		if (entry->msi_attrib.is_64) {
227			pci_read_config_dword(dev, msi_upper_address_reg(pos),
228						&msg->address_hi);
229			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
230		} else {
231			msg->address_hi = 0;
232			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
233		}
234		msg->data = data;
235	}
236}
237
238void read_msi_msg(unsigned int irq, struct msi_msg *msg)
239{
240	struct msi_desc *entry = irq_get_msi_desc(irq);
241
242	__read_msi_msg(entry, msg);
243}
244
245void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
246{
247	/* Assert that the cache is valid, assuming that
248	 * valid messages are not all-zeroes. */
249	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
250		 entry->msg.data));
251
252	*msg = entry->msg;
253}
254
255void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
256{
257	struct msi_desc *entry = irq_get_msi_desc(irq);
258
259	__get_cached_msi_msg(entry, msg);
260}
261
262void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
263{
264	if (entry->dev->current_state != PCI_D0) {
265		/* Don't touch the hardware now */
266	} else if (entry->msi_attrib.is_msix) {
267		void __iomem *base;
268		base = entry->mask_base +
269			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
270
271		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
272		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
273		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
274	} else {
275		struct pci_dev *dev = entry->dev;
276		int pos = entry->msi_attrib.pos;
277		u16 msgctl;
278
279		pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
280		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
281		msgctl |= entry->msi_attrib.multiple << 4;
282		pci_write_config_word(dev, msi_control_reg(pos), msgctl);
283
284		pci_write_config_dword(dev, msi_lower_address_reg(pos),
285					msg->address_lo);
286		if (entry->msi_attrib.is_64) {
287			pci_write_config_dword(dev, msi_upper_address_reg(pos),
288						msg->address_hi);
289			pci_write_config_word(dev, msi_data_reg(pos, 1),
290						msg->data);
291		} else {
292			pci_write_config_word(dev, msi_data_reg(pos, 0),
293						msg->data);
294		}
295	}
296	entry->msg = *msg;
297}
298
299void write_msi_msg(unsigned int irq, struct msi_msg *msg)
300{
301	struct msi_desc *entry = irq_get_msi_desc(irq);
302
303	__write_msi_msg(entry, msg);
304}
305
306static void free_msi_irqs(struct pci_dev *dev)
307{
308	struct msi_desc *entry, *tmp;
309
310	list_for_each_entry(entry, &dev->msi_list, list) {
311		int i, nvec;
312		if (!entry->irq)
313			continue;
314		nvec = 1 << entry->msi_attrib.multiple;
315		for (i = 0; i < nvec; i++)
316			BUG_ON(irq_has_action(entry->irq + i));
317	}
318
319	arch_teardown_msi_irqs(dev);
320
321	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
322		if (entry->msi_attrib.is_msix) {
323			if (list_is_last(&entry->list, &dev->msi_list))
324				iounmap(entry->mask_base);
325		}
326		kobject_del(&entry->kobj);
327		kobject_put(&entry->kobj);
328		list_del(&entry->list);
329		kfree(entry);
330	}
331}
332
333static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
334{
335	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
336	if (!desc)
337		return NULL;
338
339	INIT_LIST_HEAD(&desc->list);
340	desc->dev = dev;
341
342	return desc;
343}
344
345static void pci_intx_for_msi(struct pci_dev *dev, int enable)
346{
347	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
348		pci_intx(dev, enable);
349}
350
351static void __pci_restore_msi_state(struct pci_dev *dev)
352{
353	int pos;
354	u16 control;
355	struct msi_desc *entry;
356
357	if (!dev->msi_enabled)
358		return;
359
360	entry = irq_get_msi_desc(dev->irq);
361	pos = entry->msi_attrib.pos;
362
363	pci_intx_for_msi(dev, 0);
364	msi_set_enable(dev, pos, 0);
365	write_msi_msg(dev->irq, &entry->msg);
366
367	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
368	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
369	control &= ~PCI_MSI_FLAGS_QSIZE;
370	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
371	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
372}
373
374static void __pci_restore_msix_state(struct pci_dev *dev)
375{
376	int pos;
377	struct msi_desc *entry;
378	u16 control;
379
380	if (!dev->msix_enabled)
381		return;
382	BUG_ON(list_empty(&dev->msi_list));
383	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
384	pos = entry->msi_attrib.pos;
385	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
386
387	/* route the table */
388	pci_intx_for_msi(dev, 0);
389	control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL;
390	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
391
392	list_for_each_entry(entry, &dev->msi_list, list) {
393		write_msi_msg(entry->irq, &entry->msg);
394		msix_mask_irq(entry, entry->masked);
395	}
396
397	control &= ~PCI_MSIX_FLAGS_MASKALL;
398	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
399}
400
401void pci_restore_msi_state(struct pci_dev *dev)
402{
403	__pci_restore_msi_state(dev);
404	__pci_restore_msix_state(dev);
405}
406EXPORT_SYMBOL_GPL(pci_restore_msi_state);
407
408
409#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
410#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
411
412struct msi_attribute {
413	struct attribute        attr;
414	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
415			char *buf);
416	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
417			 const char *buf, size_t count);
418};
419
420static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
421			     char *buf)
422{
423	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
424}
425
426static ssize_t msi_irq_attr_show(struct kobject *kobj,
427				 struct attribute *attr, char *buf)
428{
429	struct msi_attribute *attribute = to_msi_attr(attr);
430	struct msi_desc *entry = to_msi_desc(kobj);
431
432	if (!attribute->show)
433		return -EIO;
434
435	return attribute->show(entry, attribute, buf);
436}
437
438static const struct sysfs_ops msi_irq_sysfs_ops = {
439	.show = msi_irq_attr_show,
440};
441
442static struct msi_attribute mode_attribute =
443	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
444
445
446struct attribute *msi_irq_default_attrs[] = {
447	&mode_attribute.attr,
448	NULL
449};
450
451void msi_kobj_release(struct kobject *kobj)
452{
453	struct msi_desc *entry = to_msi_desc(kobj);
454
455	pci_dev_put(entry->dev);
456}
457
458static struct kobj_type msi_irq_ktype = {
459	.release = msi_kobj_release,
460	.sysfs_ops = &msi_irq_sysfs_ops,
461	.default_attrs = msi_irq_default_attrs,
462};
463
464static int populate_msi_sysfs(struct pci_dev *pdev)
465{
466	struct msi_desc *entry;
467	struct kobject *kobj;
468	int ret;
469	int count = 0;
470
471	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
472	if (!pdev->msi_kset)
473		return -ENOMEM;
474
475	list_for_each_entry(entry, &pdev->msi_list, list) {
476		kobj = &entry->kobj;
477		kobj->kset = pdev->msi_kset;
478		pci_dev_get(pdev);
479		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
480				     "%u", entry->irq);
481		if (ret)
482			goto out_unroll;
483
484		count++;
485	}
486
487	return 0;
488
489out_unroll:
490	list_for_each_entry(entry, &pdev->msi_list, list) {
491		if (!count)
492			break;
493		kobject_del(&entry->kobj);
494		kobject_put(&entry->kobj);
495		count--;
496	}
497	return ret;
498}
499
500/**
501 * msi_capability_init - configure device's MSI capability structure
502 * @dev: pointer to the pci_dev data structure of MSI device function
503 * @nvec: number of interrupts to allocate
504 *
505 * Setup the MSI capability structure of the device with the requested
506 * number of interrupts.  A return value of zero indicates the successful
507 * setup of an entry with the new MSI irq.  A negative return value indicates
508 * an error, and a positive return value indicates the number of interrupts
509 * which could have been allocated.
510 */
511static int msi_capability_init(struct pci_dev *dev, int nvec)
512{
513	struct msi_desc *entry;
514	int pos, ret;
515	u16 control;
516	unsigned mask;
517
518	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
519	msi_set_enable(dev, pos, 0);	/* Disable MSI during set up */
520
521	pci_read_config_word(dev, msi_control_reg(pos), &control);
522	/* MSI Entry Initialization */
523	entry = alloc_msi_entry(dev);
524	if (!entry)
525		return -ENOMEM;
526
527	entry->msi_attrib.is_msix	= 0;
528	entry->msi_attrib.is_64		= is_64bit_address(control);
529	entry->msi_attrib.entry_nr	= 0;
530	entry->msi_attrib.maskbit	= is_mask_bit_support(control);
531	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
532	entry->msi_attrib.pos		= pos;
533
534	entry->mask_pos = msi_mask_reg(pos, entry->msi_attrib.is_64);
535	/* All MSIs are unmasked by default, Mask them all */
536	if (entry->msi_attrib.maskbit)
537		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
538	mask = msi_capable_mask(control);
539	msi_mask_irq(entry, mask, mask);
540
541	list_add_tail(&entry->list, &dev->msi_list);
542
543	/* Configure MSI capability structure */
544	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
545	if (ret) {
546		msi_mask_irq(entry, mask, ~mask);
547		free_msi_irqs(dev);
548		return ret;
549	}
550
551	ret = populate_msi_sysfs(dev);
552	if (ret) {
553		msi_mask_irq(entry, mask, ~mask);
554		free_msi_irqs(dev);
555		return ret;
556	}
557
558	/* Set MSI enabled bits	 */
559	pci_intx_for_msi(dev, 0);
560	msi_set_enable(dev, pos, 1);
561	dev->msi_enabled = 1;
562
563	dev->irq = entry->irq;
564	return 0;
565}
566
567static void __iomem *msix_map_region(struct pci_dev *dev, unsigned pos,
568							unsigned nr_entries)
569{
570	resource_size_t phys_addr;
571	u32 table_offset;
572	u8 bir;
573
574	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
575	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
576	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
577	phys_addr = pci_resource_start(dev, bir) + table_offset;
578
579	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
580}
581
582static int msix_setup_entries(struct pci_dev *dev, unsigned pos,
583				void __iomem *base, struct msix_entry *entries,
584				int nvec)
585{
586	struct msi_desc *entry;
587	int i;
588
589	for (i = 0; i < nvec; i++) {
590		entry = alloc_msi_entry(dev);
591		if (!entry) {
592			if (!i)
593				iounmap(base);
594			else
595				free_msi_irqs(dev);
596			/* No enough memory. Don't try again */
597			return -ENOMEM;
598		}
599
600		entry->msi_attrib.is_msix	= 1;
601		entry->msi_attrib.is_64		= 1;
602		entry->msi_attrib.entry_nr	= entries[i].entry;
603		entry->msi_attrib.default_irq	= dev->irq;
604		entry->msi_attrib.pos		= pos;
605		entry->mask_base		= base;
606
607		list_add_tail(&entry->list, &dev->msi_list);
608	}
609
610	return 0;
611}
612
613static void msix_program_entries(struct pci_dev *dev,
614					struct msix_entry *entries)
615{
616	struct msi_desc *entry;
617	int i = 0;
618
619	list_for_each_entry(entry, &dev->msi_list, list) {
620		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
621						PCI_MSIX_ENTRY_VECTOR_CTRL;
622
623		entries[i].vector = entry->irq;
624		irq_set_msi_desc(entry->irq, entry);
625		entry->masked = readl(entry->mask_base + offset);
626		msix_mask_irq(entry, 1);
627		i++;
628	}
629}
630
631/**
632 * msix_capability_init - configure device's MSI-X capability
633 * @dev: pointer to the pci_dev data structure of MSI-X device function
634 * @entries: pointer to an array of struct msix_entry entries
635 * @nvec: number of @entries
636 *
637 * Setup the MSI-X capability structure of device function with a
638 * single MSI-X irq. A return of zero indicates the successful setup of
639 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
640 **/
641static int msix_capability_init(struct pci_dev *dev,
642				struct msix_entry *entries, int nvec)
643{
644	int pos, ret;
645	u16 control;
646	void __iomem *base;
647
648	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
649	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
650
651	/* Ensure MSI-X is disabled while it is set up */
652	control &= ~PCI_MSIX_FLAGS_ENABLE;
653	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
654
655	/* Request & Map MSI-X table region */
656	base = msix_map_region(dev, pos, multi_msix_capable(control));
657	if (!base)
658		return -ENOMEM;
659
660	ret = msix_setup_entries(dev, pos, base, entries, nvec);
661	if (ret)
662		return ret;
663
664	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
665	if (ret)
666		goto error;
667
668	/*
669	 * Some devices require MSI-X to be enabled before we can touch the
670	 * MSI-X registers.  We need to mask all the vectors to prevent
671	 * interrupts coming in before they're fully set up.
672	 */
673	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
674	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
675
676	msix_program_entries(dev, entries);
677
678	ret = populate_msi_sysfs(dev);
679	if (ret) {
680		ret = 0;
681		goto error;
682	}
683
684	/* Set MSI-X enabled bits and unmask the function */
685	pci_intx_for_msi(dev, 0);
686	dev->msix_enabled = 1;
687
688	control &= ~PCI_MSIX_FLAGS_MASKALL;
689	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
690
691	return 0;
692
693error:
694	if (ret < 0) {
695		/*
696		 * If we had some success, report the number of irqs
697		 * we succeeded in setting up.
698		 */
699		struct msi_desc *entry;
700		int avail = 0;
701
702		list_for_each_entry(entry, &dev->msi_list, list) {
703			if (entry->irq != 0)
704				avail++;
705		}
706		if (avail != 0)
707			ret = avail;
708	}
709
710	free_msi_irqs(dev);
711
712	return ret;
713}
714
715/**
716 * pci_msi_check_device - check whether MSI may be enabled on a device
717 * @dev: pointer to the pci_dev data structure of MSI device function
718 * @nvec: how many MSIs have been requested ?
719 * @type: are we checking for MSI or MSI-X ?
720 *
721 * Look at global flags, the device itself, and its parent busses
722 * to determine if MSI/-X are supported for the device. If MSI/-X is
723 * supported return 0, else return an error code.
724 **/
725static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
726{
727	struct pci_bus *bus;
728	int ret;
729
730	/* MSI must be globally enabled and supported by the device */
731	if (!pci_msi_enable || !dev || dev->no_msi)
732		return -EINVAL;
733
734	/*
735	 * You can't ask to have 0 or less MSIs configured.
736	 *  a) it's stupid ..
737	 *  b) the list manipulation code assumes nvec >= 1.
738	 */
739	if (nvec < 1)
740		return -ERANGE;
741
742	/*
743	 * Any bridge which does NOT route MSI transactions from its
744	 * secondary bus to its primary bus must set NO_MSI flag on
745	 * the secondary pci_bus.
746	 * We expect only arch-specific PCI host bus controller driver
747	 * or quirks for specific PCI bridges to be setting NO_MSI.
748	 */
749	for (bus = dev->bus; bus; bus = bus->parent)
750		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
751			return -EINVAL;
752
753	ret = arch_msi_check_device(dev, nvec, type);
754	if (ret)
755		return ret;
756
757	if (!pci_find_capability(dev, type))
758		return -EINVAL;
759
760	return 0;
761}
762
763/**
764 * pci_enable_msi_block - configure device's MSI capability structure
765 * @dev: device to configure
766 * @nvec: number of interrupts to configure
767 *
768 * Allocate IRQs for a device with the MSI capability.
769 * This function returns a negative errno if an error occurs.  If it
770 * is unable to allocate the number of interrupts requested, it returns
771 * the number of interrupts it might be able to allocate.  If it successfully
772 * allocates at least the number of interrupts requested, it returns 0 and
773 * updates the @dev's irq member to the lowest new interrupt number; the
774 * other interrupt numbers allocated to this device are consecutive.
775 */
776int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
777{
778	int status, pos, maxvec;
779	u16 msgctl;
780
781	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
782	if (!pos)
783		return -EINVAL;
784	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
785	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
786	if (nvec > maxvec)
787		return maxvec;
788
789	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
790	if (status)
791		return status;
792
793	WARN_ON(!!dev->msi_enabled);
794
795	/* Check whether driver already requested MSI-X irqs */
796	if (dev->msix_enabled) {
797		dev_info(&dev->dev, "can't enable MSI "
798			 "(MSI-X already enabled)\n");
799		return -EINVAL;
800	}
801
802	status = msi_capability_init(dev, nvec);
803	return status;
804}
805EXPORT_SYMBOL(pci_enable_msi_block);
806
807void pci_msi_shutdown(struct pci_dev *dev)
808{
809	struct msi_desc *desc;
810	u32 mask;
811	u16 ctrl;
812	unsigned pos;
813
814	if (!pci_msi_enable || !dev || !dev->msi_enabled)
815		return;
816
817	BUG_ON(list_empty(&dev->msi_list));
818	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
819	pos = desc->msi_attrib.pos;
820
821	msi_set_enable(dev, pos, 0);
822	pci_intx_for_msi(dev, 1);
823	dev->msi_enabled = 0;
824
825	/* Return the device with MSI unmasked as initial states */
826	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &ctrl);
827	mask = msi_capable_mask(ctrl);
828	/* Keep cached state to be restored */
829	__msi_mask_irq(desc, mask, ~mask);
830
831	/* Restore dev->irq to its default pin-assertion irq */
832	dev->irq = desc->msi_attrib.default_irq;
833}
834
835void pci_disable_msi(struct pci_dev *dev)
836{
837	if (!pci_msi_enable || !dev || !dev->msi_enabled)
838		return;
839
840	pci_msi_shutdown(dev);
841	free_msi_irqs(dev);
842	kset_unregister(dev->msi_kset);
843	dev->msi_kset = NULL;
844}
845EXPORT_SYMBOL(pci_disable_msi);
846
847/**
848 * pci_msix_table_size - return the number of device's MSI-X table entries
849 * @dev: pointer to the pci_dev data structure of MSI-X device function
850 */
851int pci_msix_table_size(struct pci_dev *dev)
852{
853	int pos;
854	u16 control;
855
856	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
857	if (!pos)
858		return 0;
859
860	pci_read_config_word(dev, msi_control_reg(pos), &control);
861	return multi_msix_capable(control);
862}
863
864/**
865 * pci_enable_msix - configure device's MSI-X capability structure
866 * @dev: pointer to the pci_dev data structure of MSI-X device function
867 * @entries: pointer to an array of MSI-X entries
868 * @nvec: number of MSI-X irqs requested for allocation by device driver
869 *
870 * Setup the MSI-X capability structure of device function with the number
871 * of requested irqs upon its software driver call to request for
872 * MSI-X mode enabled on its hardware device function. A return of zero
873 * indicates the successful configuration of MSI-X capability structure
874 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
875 * Or a return of > 0 indicates that driver request is exceeding the number
876 * of irqs or MSI-X vectors available. Driver should use the returned value to
877 * re-send its request.
878 **/
879int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
880{
881	int status, nr_entries;
882	int i, j;
883
884	if (!entries)
885		return -EINVAL;
886
887	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
888	if (status)
889		return status;
890
891	nr_entries = pci_msix_table_size(dev);
892	if (nvec > nr_entries)
893		return nr_entries;
894
895	/* Check for any invalid entries */
896	for (i = 0; i < nvec; i++) {
897		if (entries[i].entry >= nr_entries)
898			return -EINVAL;		/* invalid entry */
899		for (j = i + 1; j < nvec; j++) {
900			if (entries[i].entry == entries[j].entry)
901				return -EINVAL;	/* duplicate entry */
902		}
903	}
904	WARN_ON(!!dev->msix_enabled);
905
906	/* Check whether driver already requested for MSI irq */
907	if (dev->msi_enabled) {
908		dev_info(&dev->dev, "can't enable MSI-X "
909		       "(MSI IRQ already assigned)\n");
910		return -EINVAL;
911	}
912	status = msix_capability_init(dev, entries, nvec);
913	return status;
914}
915EXPORT_SYMBOL(pci_enable_msix);
916
917void pci_msix_shutdown(struct pci_dev *dev)
918{
919	struct msi_desc *entry;
920
921	if (!pci_msi_enable || !dev || !dev->msix_enabled)
922		return;
923
924	/* Return the device with MSI-X masked as initial states */
925	list_for_each_entry(entry, &dev->msi_list, list) {
926		/* Keep cached states to be restored */
927		__msix_mask_irq(entry, 1);
928	}
929
930	msix_set_enable(dev, 0);
931	pci_intx_for_msi(dev, 1);
932	dev->msix_enabled = 0;
933}
934
935void pci_disable_msix(struct pci_dev *dev)
936{
937	if (!pci_msi_enable || !dev || !dev->msix_enabled)
938		return;
939
940	pci_msix_shutdown(dev);
941	free_msi_irqs(dev);
942	kset_unregister(dev->msi_kset);
943	dev->msi_kset = NULL;
944}
945EXPORT_SYMBOL(pci_disable_msix);
946
947/**
948 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
949 * @dev: pointer to the pci_dev data structure of MSI(X) device function
950 *
951 * Being called during hotplug remove, from which the device function
952 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
953 * allocated for this device function, are reclaimed to unused state,
954 * which may be used later on.
955 **/
956void msi_remove_pci_irq_vectors(struct pci_dev *dev)
957{
958	if (!pci_msi_enable || !dev)
959		return;
960
961	if (dev->msi_enabled || dev->msix_enabled)
962		free_msi_irqs(dev);
963}
964
965void pci_no_msi(void)
966{
967	pci_msi_enable = 0;
968}
969
970/**
971 * pci_msi_enabled - is MSI enabled?
972 *
973 * Returns true if MSI has not been disabled by the command-line option
974 * pci=nomsi.
975 **/
976int pci_msi_enabled(void)
977{
978	return pci_msi_enable;
979}
980EXPORT_SYMBOL(pci_msi_enabled);
981
982void pci_msi_init_pci_dev(struct pci_dev *dev)
983{
984	INIT_LIST_HEAD(&dev->msi_list);
985}
986