msi.c revision 76ccc297018d25d55b789bbd508861ef1e2cdb0c
1ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot/*
2ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot * File:	msi.c
3ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot * Purpose:	PCI Message Signaled Interrupt (MSI)
4ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot *
5ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot * Copyright (C) 2003-2004 Intel
6ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot */
8ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
9ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/err.h>
10ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/mm.h>
11ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/irq.h>
12ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/interrupt.h>
13ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/init.h>
14ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/export.h>
15ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/ioport.h>
16ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/pci.h>
17ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/proc_fs.h>
18ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/msi.h>
19ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/smp.h>
20ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/errno.h>
21ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/io.h>
22ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include <linux/slab.h>
23ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
24ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include "pci.h"
25ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#include "msi.h"
26ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
27ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotstatic int pci_msi_enable = 1;
28ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
29ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot/* Arch hooks */
306a846f3f821a252762897751fa0aeb68dda635f5David Howells
31ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifndef arch_msi_check_device
32ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotint arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
33ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot{
34ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	return 0;
35ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot}
36ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#endif
37ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
38ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifndef arch_setup_msi_irqs
39ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot# define arch_setup_msi_irqs default_setup_msi_irqs
40ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot# define HAVE_DEFAULT_MSI_SETUP_IRQS
41ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#endif
42ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
43ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS
44ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotint default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
45ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot{
46ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	struct msi_desc *entry;
47ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	int ret;
48ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
49ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	/*
50ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	 * If an architecture wants to support multiple MSI, it needs to
51ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	 * override arch_setup_msi_irqs()
52ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	 */
53ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	if (type == PCI_CAP_ID_MSI && nvec > 1)
54ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		return 1;
55ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
56ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	list_for_each_entry(entry, &dev->msi_list, list) {
57ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		ret = arch_setup_msi_irq(dev, entry);
58ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		if (ret < 0)
59ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot			return ret;
60ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		if (ret > 0)
61ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot			return -ENOSPC;
62ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	}
63ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
64ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	return 0;
65ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot}
66ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#endif
67ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
68ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifndef arch_teardown_msi_irqs
69ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot# define arch_teardown_msi_irqs default_teardown_msi_irqs
70ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
71ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#endif
72ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
73ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS
74ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotvoid default_teardown_msi_irqs(struct pci_dev *dev)
75ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot{
76ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	struct msi_desc *entry;
770bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter
78ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	list_for_each_entry(entry, &dev->msi_list, list) {
790bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter		int i, nvec;
800bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter		if (entry->irq == 0)
81ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot			continue;
82ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		nvec = 1 << entry->msi_attrib.multiple;
83ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		for (i = 0; i < nvec; i++)
84ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot			arch_teardown_msi_irq(entry->irq + i);
85ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	}
86ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot}
87ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#endif
88ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
89ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifndef arch_restore_msi_irqs
9015a25980d450c81e514c2a8724b575461961a30dGrant Likely# define arch_restore_msi_irqs default_restore_msi_irqs
910bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter# define HAVE_DEFAULT_MSI_RESTORE_IRQS
92c1e572e6506082ed120a13454b2cc2f525ee7aa6Grant Likely#endif
93ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
94ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
95ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotvoid default_restore_msi_irqs(struct pci_dev *dev, int irq)
96ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot{
97ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	struct msi_desc *entry;
98ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
99ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	entry = NULL;
100ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	if (dev->msix_enabled) {
101ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		list_for_each_entry(entry, &dev->msi_list, list) {
102ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot			if (irq == entry->irq)
103ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot				break;
104ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		}
1050bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter	} else if (dev->msi_enabled)  {
1060bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter		entry = irq_get_msi_desc(irq);
1070bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter	}
1080bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter
1090bd761e1b6d909d3fd08841be7d5035f9fde8a53Mark Salter	if (entry)
110ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		write_msi_msg(irq, &entry->msg);
111ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot}
112ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot#endif
113ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
114ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotstatic void msi_set_enable(struct pci_dev *dev, int pos, int enable)
115ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot{
116ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	u16 control;
117ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
118ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	BUG_ON(!pos);
119ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
120ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
121ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	control &= ~PCI_MSI_FLAGS_ENABLE;
122ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	if (enable)
123ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot		control |= PCI_MSI_FLAGS_ENABLE;
124ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
125ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot}
126ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
127ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiotstatic void msix_set_enable(struct pci_dev *dev, int enable)
128ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot{
129ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	int pos;
130ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	u16 control;
131ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot
132ec500af3059b474df35418c41c684c1cde830c81Aurelien Jacquiot	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
133	if (pos) {
134		pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
135		control &= ~PCI_MSIX_FLAGS_ENABLE;
136		if (enable)
137			control |= PCI_MSIX_FLAGS_ENABLE;
138		pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
139	}
140}
141
142static inline __attribute_const__ u32 msi_mask(unsigned x)
143{
144	/* Don't shift by >= width of type */
145	if (x >= 5)
146		return 0xffffffff;
147	return (1 << (1 << x)) - 1;
148}
149
150static inline __attribute_const__ u32 msi_capable_mask(u16 control)
151{
152	return msi_mask((control >> 1) & 7);
153}
154
155static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
156{
157	return msi_mask((control >> 4) & 7);
158}
159
160/*
161 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
162 * mask all MSI interrupts by clearing the MSI enable bit does not work
163 * reliably as devices without an INTx disable bit will then generate a
164 * level IRQ which will never be cleared.
165 */
166static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
167{
168	u32 mask_bits = desc->masked;
169
170	if (!desc->msi_attrib.maskbit)
171		return 0;
172
173	mask_bits &= ~mask;
174	mask_bits |= flag;
175	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
176
177	return mask_bits;
178}
179
180static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
181{
182	desc->masked = __msi_mask_irq(desc, mask, flag);
183}
184
185/*
186 * This internal function does not flush PCI writes to the device.
187 * All users must ensure that they read from the device before either
188 * assuming that the device state is up to date, or returning out of this
189 * file.  This saves a few milliseconds when initialising devices with lots
190 * of MSI-X interrupts.
191 */
192static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
193{
194	u32 mask_bits = desc->masked;
195	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
196						PCI_MSIX_ENTRY_VECTOR_CTRL;
197	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
198	if (flag)
199		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
200	writel(mask_bits, desc->mask_base + offset);
201
202	return mask_bits;
203}
204
205static void msix_mask_irq(struct msi_desc *desc, u32 flag)
206{
207	desc->masked = __msix_mask_irq(desc, flag);
208}
209
210static void msi_set_mask_bit(struct irq_data *data, u32 flag)
211{
212	struct msi_desc *desc = irq_data_get_msi(data);
213
214	if (desc->msi_attrib.is_msix) {
215		msix_mask_irq(desc, flag);
216		readl(desc->mask_base);		/* Flush write to device */
217	} else {
218		unsigned offset = data->irq - desc->dev->irq;
219		msi_mask_irq(desc, 1 << offset, flag << offset);
220	}
221}
222
223void mask_msi_irq(struct irq_data *data)
224{
225	msi_set_mask_bit(data, 1);
226}
227
228void unmask_msi_irq(struct irq_data *data)
229{
230	msi_set_mask_bit(data, 0);
231}
232
233void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
234{
235	BUG_ON(entry->dev->current_state != PCI_D0);
236
237	if (entry->msi_attrib.is_msix) {
238		void __iomem *base = entry->mask_base +
239			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
240
241		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
242		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
243		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
244	} else {
245		struct pci_dev *dev = entry->dev;
246		int pos = entry->msi_attrib.pos;
247		u16 data;
248
249		pci_read_config_dword(dev, msi_lower_address_reg(pos),
250					&msg->address_lo);
251		if (entry->msi_attrib.is_64) {
252			pci_read_config_dword(dev, msi_upper_address_reg(pos),
253						&msg->address_hi);
254			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
255		} else {
256			msg->address_hi = 0;
257			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
258		}
259		msg->data = data;
260	}
261}
262
263void read_msi_msg(unsigned int irq, struct msi_msg *msg)
264{
265	struct msi_desc *entry = irq_get_msi_desc(irq);
266
267	__read_msi_msg(entry, msg);
268}
269
270void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
271{
272	/* Assert that the cache is valid, assuming that
273	 * valid messages are not all-zeroes. */
274	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
275		 entry->msg.data));
276
277	*msg = entry->msg;
278}
279
280void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
281{
282	struct msi_desc *entry = irq_get_msi_desc(irq);
283
284	__get_cached_msi_msg(entry, msg);
285}
286
287void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
288{
289	if (entry->dev->current_state != PCI_D0) {
290		/* Don't touch the hardware now */
291	} else if (entry->msi_attrib.is_msix) {
292		void __iomem *base;
293		base = entry->mask_base +
294			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
295
296		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
297		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
298		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
299	} else {
300		struct pci_dev *dev = entry->dev;
301		int pos = entry->msi_attrib.pos;
302		u16 msgctl;
303
304		pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
305		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
306		msgctl |= entry->msi_attrib.multiple << 4;
307		pci_write_config_word(dev, msi_control_reg(pos), msgctl);
308
309		pci_write_config_dword(dev, msi_lower_address_reg(pos),
310					msg->address_lo);
311		if (entry->msi_attrib.is_64) {
312			pci_write_config_dword(dev, msi_upper_address_reg(pos),
313						msg->address_hi);
314			pci_write_config_word(dev, msi_data_reg(pos, 1),
315						msg->data);
316		} else {
317			pci_write_config_word(dev, msi_data_reg(pos, 0),
318						msg->data);
319		}
320	}
321	entry->msg = *msg;
322}
323
324void write_msi_msg(unsigned int irq, struct msi_msg *msg)
325{
326	struct msi_desc *entry = irq_get_msi_desc(irq);
327
328	__write_msi_msg(entry, msg);
329}
330
331static void free_msi_irqs(struct pci_dev *dev)
332{
333	struct msi_desc *entry, *tmp;
334
335	list_for_each_entry(entry, &dev->msi_list, list) {
336		int i, nvec;
337		if (!entry->irq)
338			continue;
339		nvec = 1 << entry->msi_attrib.multiple;
340		for (i = 0; i < nvec; i++)
341			BUG_ON(irq_has_action(entry->irq + i));
342	}
343
344	arch_teardown_msi_irqs(dev);
345
346	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
347		if (entry->msi_attrib.is_msix) {
348			if (list_is_last(&entry->list, &dev->msi_list))
349				iounmap(entry->mask_base);
350		}
351
352		/*
353		 * Its possible that we get into this path
354		 * When populate_msi_sysfs fails, which means the entries
355		 * were not registered with sysfs.  In that case don't
356		 * unregister them.
357		 */
358		if (entry->kobj.parent) {
359			kobject_del(&entry->kobj);
360			kobject_put(&entry->kobj);
361		}
362
363		list_del(&entry->list);
364		kfree(entry);
365	}
366}
367
368static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
369{
370	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
371	if (!desc)
372		return NULL;
373
374	INIT_LIST_HEAD(&desc->list);
375	desc->dev = dev;
376
377	return desc;
378}
379
380static void pci_intx_for_msi(struct pci_dev *dev, int enable)
381{
382	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
383		pci_intx(dev, enable);
384}
385
386static void __pci_restore_msi_state(struct pci_dev *dev)
387{
388	int pos;
389	u16 control;
390	struct msi_desc *entry;
391
392	if (!dev->msi_enabled)
393		return;
394
395	entry = irq_get_msi_desc(dev->irq);
396	pos = entry->msi_attrib.pos;
397
398	pci_intx_for_msi(dev, 0);
399	msi_set_enable(dev, pos, 0);
400	arch_restore_msi_irqs(dev, dev->irq);
401
402	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
403	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
404	control &= ~PCI_MSI_FLAGS_QSIZE;
405	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
406	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
407}
408
409static void __pci_restore_msix_state(struct pci_dev *dev)
410{
411	int pos;
412	struct msi_desc *entry;
413	u16 control;
414
415	if (!dev->msix_enabled)
416		return;
417	BUG_ON(list_empty(&dev->msi_list));
418	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
419	pos = entry->msi_attrib.pos;
420	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
421
422	/* route the table */
423	pci_intx_for_msi(dev, 0);
424	control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL;
425	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
426
427	list_for_each_entry(entry, &dev->msi_list, list) {
428		arch_restore_msi_irqs(dev, entry->irq);
429		msix_mask_irq(entry, entry->masked);
430	}
431
432	control &= ~PCI_MSIX_FLAGS_MASKALL;
433	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
434}
435
436void pci_restore_msi_state(struct pci_dev *dev)
437{
438	__pci_restore_msi_state(dev);
439	__pci_restore_msix_state(dev);
440}
441EXPORT_SYMBOL_GPL(pci_restore_msi_state);
442
443
444#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
445#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
446
447struct msi_attribute {
448	struct attribute        attr;
449	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
450			char *buf);
451	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
452			 const char *buf, size_t count);
453};
454
455static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
456			     char *buf)
457{
458	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
459}
460
461static ssize_t msi_irq_attr_show(struct kobject *kobj,
462				 struct attribute *attr, char *buf)
463{
464	struct msi_attribute *attribute = to_msi_attr(attr);
465	struct msi_desc *entry = to_msi_desc(kobj);
466
467	if (!attribute->show)
468		return -EIO;
469
470	return attribute->show(entry, attribute, buf);
471}
472
473static const struct sysfs_ops msi_irq_sysfs_ops = {
474	.show = msi_irq_attr_show,
475};
476
477static struct msi_attribute mode_attribute =
478	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
479
480
481struct attribute *msi_irq_default_attrs[] = {
482	&mode_attribute.attr,
483	NULL
484};
485
486void msi_kobj_release(struct kobject *kobj)
487{
488	struct msi_desc *entry = to_msi_desc(kobj);
489
490	pci_dev_put(entry->dev);
491}
492
493static struct kobj_type msi_irq_ktype = {
494	.release = msi_kobj_release,
495	.sysfs_ops = &msi_irq_sysfs_ops,
496	.default_attrs = msi_irq_default_attrs,
497};
498
499static int populate_msi_sysfs(struct pci_dev *pdev)
500{
501	struct msi_desc *entry;
502	struct kobject *kobj;
503	int ret;
504	int count = 0;
505
506	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
507	if (!pdev->msi_kset)
508		return -ENOMEM;
509
510	list_for_each_entry(entry, &pdev->msi_list, list) {
511		kobj = &entry->kobj;
512		kobj->kset = pdev->msi_kset;
513		pci_dev_get(pdev);
514		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
515				     "%u", entry->irq);
516		if (ret)
517			goto out_unroll;
518
519		count++;
520	}
521
522	return 0;
523
524out_unroll:
525	list_for_each_entry(entry, &pdev->msi_list, list) {
526		if (!count)
527			break;
528		kobject_del(&entry->kobj);
529		kobject_put(&entry->kobj);
530		count--;
531	}
532	return ret;
533}
534
535/**
536 * msi_capability_init - configure device's MSI capability structure
537 * @dev: pointer to the pci_dev data structure of MSI device function
538 * @nvec: number of interrupts to allocate
539 *
540 * Setup the MSI capability structure of the device with the requested
541 * number of interrupts.  A return value of zero indicates the successful
542 * setup of an entry with the new MSI irq.  A negative return value indicates
543 * an error, and a positive return value indicates the number of interrupts
544 * which could have been allocated.
545 */
546static int msi_capability_init(struct pci_dev *dev, int nvec)
547{
548	struct msi_desc *entry;
549	int pos, ret;
550	u16 control;
551	unsigned mask;
552
553	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
554	msi_set_enable(dev, pos, 0);	/* Disable MSI during set up */
555
556	pci_read_config_word(dev, msi_control_reg(pos), &control);
557	/* MSI Entry Initialization */
558	entry = alloc_msi_entry(dev);
559	if (!entry)
560		return -ENOMEM;
561
562	entry->msi_attrib.is_msix	= 0;
563	entry->msi_attrib.is_64		= is_64bit_address(control);
564	entry->msi_attrib.entry_nr	= 0;
565	entry->msi_attrib.maskbit	= is_mask_bit_support(control);
566	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
567	entry->msi_attrib.pos		= pos;
568
569	entry->mask_pos = msi_mask_reg(pos, entry->msi_attrib.is_64);
570	/* All MSIs are unmasked by default, Mask them all */
571	if (entry->msi_attrib.maskbit)
572		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
573	mask = msi_capable_mask(control);
574	msi_mask_irq(entry, mask, mask);
575
576	list_add_tail(&entry->list, &dev->msi_list);
577
578	/* Configure MSI capability structure */
579	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
580	if (ret) {
581		msi_mask_irq(entry, mask, ~mask);
582		free_msi_irqs(dev);
583		return ret;
584	}
585
586	ret = populate_msi_sysfs(dev);
587	if (ret) {
588		msi_mask_irq(entry, mask, ~mask);
589		free_msi_irqs(dev);
590		return ret;
591	}
592
593	/* Set MSI enabled bits	 */
594	pci_intx_for_msi(dev, 0);
595	msi_set_enable(dev, pos, 1);
596	dev->msi_enabled = 1;
597
598	dev->irq = entry->irq;
599	return 0;
600}
601
602static void __iomem *msix_map_region(struct pci_dev *dev, unsigned pos,
603							unsigned nr_entries)
604{
605	resource_size_t phys_addr;
606	u32 table_offset;
607	u8 bir;
608
609	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
610	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
611	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
612	phys_addr = pci_resource_start(dev, bir) + table_offset;
613
614	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
615}
616
617static int msix_setup_entries(struct pci_dev *dev, unsigned pos,
618				void __iomem *base, struct msix_entry *entries,
619				int nvec)
620{
621	struct msi_desc *entry;
622	int i;
623
624	for (i = 0; i < nvec; i++) {
625		entry = alloc_msi_entry(dev);
626		if (!entry) {
627			if (!i)
628				iounmap(base);
629			else
630				free_msi_irqs(dev);
631			/* No enough memory. Don't try again */
632			return -ENOMEM;
633		}
634
635		entry->msi_attrib.is_msix	= 1;
636		entry->msi_attrib.is_64		= 1;
637		entry->msi_attrib.entry_nr	= entries[i].entry;
638		entry->msi_attrib.default_irq	= dev->irq;
639		entry->msi_attrib.pos		= pos;
640		entry->mask_base		= base;
641
642		list_add_tail(&entry->list, &dev->msi_list);
643	}
644
645	return 0;
646}
647
648static void msix_program_entries(struct pci_dev *dev,
649					struct msix_entry *entries)
650{
651	struct msi_desc *entry;
652	int i = 0;
653
654	list_for_each_entry(entry, &dev->msi_list, list) {
655		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
656						PCI_MSIX_ENTRY_VECTOR_CTRL;
657
658		entries[i].vector = entry->irq;
659		irq_set_msi_desc(entry->irq, entry);
660		entry->masked = readl(entry->mask_base + offset);
661		msix_mask_irq(entry, 1);
662		i++;
663	}
664}
665
666/**
667 * msix_capability_init - configure device's MSI-X capability
668 * @dev: pointer to the pci_dev data structure of MSI-X device function
669 * @entries: pointer to an array of struct msix_entry entries
670 * @nvec: number of @entries
671 *
672 * Setup the MSI-X capability structure of device function with a
673 * single MSI-X irq. A return of zero indicates the successful setup of
674 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
675 **/
676static int msix_capability_init(struct pci_dev *dev,
677				struct msix_entry *entries, int nvec)
678{
679	int pos, ret;
680	u16 control;
681	void __iomem *base;
682
683	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
684	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
685
686	/* Ensure MSI-X is disabled while it is set up */
687	control &= ~PCI_MSIX_FLAGS_ENABLE;
688	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
689
690	/* Request & Map MSI-X table region */
691	base = msix_map_region(dev, pos, multi_msix_capable(control));
692	if (!base)
693		return -ENOMEM;
694
695	ret = msix_setup_entries(dev, pos, base, entries, nvec);
696	if (ret)
697		return ret;
698
699	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
700	if (ret)
701		goto error;
702
703	/*
704	 * Some devices require MSI-X to be enabled before we can touch the
705	 * MSI-X registers.  We need to mask all the vectors to prevent
706	 * interrupts coming in before they're fully set up.
707	 */
708	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
709	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
710
711	msix_program_entries(dev, entries);
712
713	ret = populate_msi_sysfs(dev);
714	if (ret) {
715		ret = 0;
716		goto error;
717	}
718
719	/* Set MSI-X enabled bits and unmask the function */
720	pci_intx_for_msi(dev, 0);
721	dev->msix_enabled = 1;
722
723	control &= ~PCI_MSIX_FLAGS_MASKALL;
724	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
725
726	return 0;
727
728error:
729	if (ret < 0) {
730		/*
731		 * If we had some success, report the number of irqs
732		 * we succeeded in setting up.
733		 */
734		struct msi_desc *entry;
735		int avail = 0;
736
737		list_for_each_entry(entry, &dev->msi_list, list) {
738			if (entry->irq != 0)
739				avail++;
740		}
741		if (avail != 0)
742			ret = avail;
743	}
744
745	free_msi_irqs(dev);
746
747	return ret;
748}
749
750/**
751 * pci_msi_check_device - check whether MSI may be enabled on a device
752 * @dev: pointer to the pci_dev data structure of MSI device function
753 * @nvec: how many MSIs have been requested ?
754 * @type: are we checking for MSI or MSI-X ?
755 *
756 * Look at global flags, the device itself, and its parent busses
757 * to determine if MSI/-X are supported for the device. If MSI/-X is
758 * supported return 0, else return an error code.
759 **/
760static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
761{
762	struct pci_bus *bus;
763	int ret;
764
765	/* MSI must be globally enabled and supported by the device */
766	if (!pci_msi_enable || !dev || dev->no_msi)
767		return -EINVAL;
768
769	/*
770	 * You can't ask to have 0 or less MSIs configured.
771	 *  a) it's stupid ..
772	 *  b) the list manipulation code assumes nvec >= 1.
773	 */
774	if (nvec < 1)
775		return -ERANGE;
776
777	/*
778	 * Any bridge which does NOT route MSI transactions from its
779	 * secondary bus to its primary bus must set NO_MSI flag on
780	 * the secondary pci_bus.
781	 * We expect only arch-specific PCI host bus controller driver
782	 * or quirks for specific PCI bridges to be setting NO_MSI.
783	 */
784	for (bus = dev->bus; bus; bus = bus->parent)
785		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
786			return -EINVAL;
787
788	ret = arch_msi_check_device(dev, nvec, type);
789	if (ret)
790		return ret;
791
792	if (!pci_find_capability(dev, type))
793		return -EINVAL;
794
795	return 0;
796}
797
798/**
799 * pci_enable_msi_block - configure device's MSI capability structure
800 * @dev: device to configure
801 * @nvec: number of interrupts to configure
802 *
803 * Allocate IRQs for a device with the MSI capability.
804 * This function returns a negative errno if an error occurs.  If it
805 * is unable to allocate the number of interrupts requested, it returns
806 * the number of interrupts it might be able to allocate.  If it successfully
807 * allocates at least the number of interrupts requested, it returns 0 and
808 * updates the @dev's irq member to the lowest new interrupt number; the
809 * other interrupt numbers allocated to this device are consecutive.
810 */
811int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
812{
813	int status, pos, maxvec;
814	u16 msgctl;
815
816	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
817	if (!pos)
818		return -EINVAL;
819	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
820	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
821	if (nvec > maxvec)
822		return maxvec;
823
824	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
825	if (status)
826		return status;
827
828	WARN_ON(!!dev->msi_enabled);
829
830	/* Check whether driver already requested MSI-X irqs */
831	if (dev->msix_enabled) {
832		dev_info(&dev->dev, "can't enable MSI "
833			 "(MSI-X already enabled)\n");
834		return -EINVAL;
835	}
836
837	status = msi_capability_init(dev, nvec);
838	return status;
839}
840EXPORT_SYMBOL(pci_enable_msi_block);
841
842void pci_msi_shutdown(struct pci_dev *dev)
843{
844	struct msi_desc *desc;
845	u32 mask;
846	u16 ctrl;
847	unsigned pos;
848
849	if (!pci_msi_enable || !dev || !dev->msi_enabled)
850		return;
851
852	BUG_ON(list_empty(&dev->msi_list));
853	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
854	pos = desc->msi_attrib.pos;
855
856	msi_set_enable(dev, pos, 0);
857	pci_intx_for_msi(dev, 1);
858	dev->msi_enabled = 0;
859
860	/* Return the device with MSI unmasked as initial states */
861	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &ctrl);
862	mask = msi_capable_mask(ctrl);
863	/* Keep cached state to be restored */
864	__msi_mask_irq(desc, mask, ~mask);
865
866	/* Restore dev->irq to its default pin-assertion irq */
867	dev->irq = desc->msi_attrib.default_irq;
868}
869
870void pci_disable_msi(struct pci_dev *dev)
871{
872	if (!pci_msi_enable || !dev || !dev->msi_enabled)
873		return;
874
875	pci_msi_shutdown(dev);
876	free_msi_irqs(dev);
877	kset_unregister(dev->msi_kset);
878	dev->msi_kset = NULL;
879}
880EXPORT_SYMBOL(pci_disable_msi);
881
882/**
883 * pci_msix_table_size - return the number of device's MSI-X table entries
884 * @dev: pointer to the pci_dev data structure of MSI-X device function
885 */
886int pci_msix_table_size(struct pci_dev *dev)
887{
888	int pos;
889	u16 control;
890
891	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
892	if (!pos)
893		return 0;
894
895	pci_read_config_word(dev, msi_control_reg(pos), &control);
896	return multi_msix_capable(control);
897}
898
899/**
900 * pci_enable_msix - configure device's MSI-X capability structure
901 * @dev: pointer to the pci_dev data structure of MSI-X device function
902 * @entries: pointer to an array of MSI-X entries
903 * @nvec: number of MSI-X irqs requested for allocation by device driver
904 *
905 * Setup the MSI-X capability structure of device function with the number
906 * of requested irqs upon its software driver call to request for
907 * MSI-X mode enabled on its hardware device function. A return of zero
908 * indicates the successful configuration of MSI-X capability structure
909 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
910 * Or a return of > 0 indicates that driver request is exceeding the number
911 * of irqs or MSI-X vectors available. Driver should use the returned value to
912 * re-send its request.
913 **/
914int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
915{
916	int status, nr_entries;
917	int i, j;
918
919	if (!entries)
920		return -EINVAL;
921
922	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
923	if (status)
924		return status;
925
926	nr_entries = pci_msix_table_size(dev);
927	if (nvec > nr_entries)
928		return nr_entries;
929
930	/* Check for any invalid entries */
931	for (i = 0; i < nvec; i++) {
932		if (entries[i].entry >= nr_entries)
933			return -EINVAL;		/* invalid entry */
934		for (j = i + 1; j < nvec; j++) {
935			if (entries[i].entry == entries[j].entry)
936				return -EINVAL;	/* duplicate entry */
937		}
938	}
939	WARN_ON(!!dev->msix_enabled);
940
941	/* Check whether driver already requested for MSI irq */
942	if (dev->msi_enabled) {
943		dev_info(&dev->dev, "can't enable MSI-X "
944		       "(MSI IRQ already assigned)\n");
945		return -EINVAL;
946	}
947	status = msix_capability_init(dev, entries, nvec);
948	return status;
949}
950EXPORT_SYMBOL(pci_enable_msix);
951
952void pci_msix_shutdown(struct pci_dev *dev)
953{
954	struct msi_desc *entry;
955
956	if (!pci_msi_enable || !dev || !dev->msix_enabled)
957		return;
958
959	/* Return the device with MSI-X masked as initial states */
960	list_for_each_entry(entry, &dev->msi_list, list) {
961		/* Keep cached states to be restored */
962		__msix_mask_irq(entry, 1);
963	}
964
965	msix_set_enable(dev, 0);
966	pci_intx_for_msi(dev, 1);
967	dev->msix_enabled = 0;
968}
969
970void pci_disable_msix(struct pci_dev *dev)
971{
972	if (!pci_msi_enable || !dev || !dev->msix_enabled)
973		return;
974
975	pci_msix_shutdown(dev);
976	free_msi_irqs(dev);
977	kset_unregister(dev->msi_kset);
978	dev->msi_kset = NULL;
979}
980EXPORT_SYMBOL(pci_disable_msix);
981
982/**
983 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
984 * @dev: pointer to the pci_dev data structure of MSI(X) device function
985 *
986 * Being called during hotplug remove, from which the device function
987 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
988 * allocated for this device function, are reclaimed to unused state,
989 * which may be used later on.
990 **/
991void msi_remove_pci_irq_vectors(struct pci_dev *dev)
992{
993	if (!pci_msi_enable || !dev)
994		return;
995
996	if (dev->msi_enabled || dev->msix_enabled)
997		free_msi_irqs(dev);
998}
999
1000void pci_no_msi(void)
1001{
1002	pci_msi_enable = 0;
1003}
1004
1005/**
1006 * pci_msi_enabled - is MSI enabled?
1007 *
1008 * Returns true if MSI has not been disabled by the command-line option
1009 * pci=nomsi.
1010 **/
1011int pci_msi_enabled(void)
1012{
1013	return pci_msi_enable;
1014}
1015EXPORT_SYMBOL(pci_msi_enabled);
1016
1017void pci_msi_init_pci_dev(struct pci_dev *dev)
1018{
1019	int pos;
1020	INIT_LIST_HEAD(&dev->msi_list);
1021
1022	/* Disable the msi hardware to avoid screaming interrupts
1023	 * during boot.  This is the power on reset default so
1024	 * usually this should be a noop.
1025	 */
1026	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
1027	if (pos)
1028		msi_set_enable(dev, pos, 0);
1029	msix_set_enable(dev, 0);
1030}
1031