msi.c revision 1f80025e624bb14fefadfef7e80fbfb9740d4714
1/*
2 * File:	msi.c
3 * Purpose:	PCI Message Signaled Interrupt (MSI)
4 *
5 * Copyright (C) 2003-2004 Intel
6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7 */
8
9#include <linux/err.h>
10#include <linux/mm.h>
11#include <linux/irq.h>
12#include <linux/interrupt.h>
13#include <linux/init.h>
14#include <linux/ioport.h>
15#include <linux/smp_lock.h>
16#include <linux/pci.h>
17#include <linux/proc_fs.h>
18
19#include <asm/errno.h>
20#include <asm/io.h>
21#include <asm/smp.h>
22
23#include "pci.h"
24#include "msi.h"
25
26static DEFINE_SPINLOCK(msi_lock);
27static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
28static kmem_cache_t* msi_cachep;
29
30static int pci_msi_enable = 1;
31
32static struct msi_ops *msi_ops;
33
34int
35msi_register(struct msi_ops *ops)
36{
37	msi_ops = ops;
38	return 0;
39}
40
41static int msi_cache_init(void)
42{
43	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
44					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
45	if (!msi_cachep)
46		return -ENOMEM;
47
48	return 0;
49}
50
51static void msi_set_mask_bit(unsigned int irq, int flag)
52{
53	struct msi_desc *entry;
54
55	entry = msi_desc[irq];
56	if (!entry || !entry->dev || !entry->mask_base)
57		return;
58	switch (entry->msi_attrib.type) {
59	case PCI_CAP_ID_MSI:
60	{
61		int		pos;
62		u32		mask_bits;
63
64		pos = (long)entry->mask_base;
65		pci_read_config_dword(entry->dev, pos, &mask_bits);
66		mask_bits &= ~(1);
67		mask_bits |= flag;
68		pci_write_config_dword(entry->dev, pos, mask_bits);
69		break;
70	}
71	case PCI_CAP_ID_MSIX:
72	{
73		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
74			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
75		writel(flag, entry->mask_base + offset);
76		break;
77	}
78	default:
79		break;
80	}
81}
82
83static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
84{
85	switch(entry->msi_attrib.type) {
86	case PCI_CAP_ID_MSI:
87	{
88		struct pci_dev *dev = entry->dev;
89		int pos = entry->msi_attrib.pos;
90		u16 data;
91
92		pci_read_config_dword(dev, msi_lower_address_reg(pos),
93					&msg->address_lo);
94		if (entry->msi_attrib.is_64) {
95			pci_read_config_dword(dev, msi_upper_address_reg(pos),
96						&msg->address_hi);
97			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
98		} else {
99			msg->address_hi = 0;
100			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
101		}
102		msg->data = data;
103		break;
104	}
105	case PCI_CAP_ID_MSIX:
106	{
107		void __iomem *base;
108		base = entry->mask_base +
109			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
110
111		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
112		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
113		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
114 		break;
115 	}
116 	default:
117		BUG();
118	}
119}
120
121static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
122{
123	switch (entry->msi_attrib.type) {
124	case PCI_CAP_ID_MSI:
125	{
126		struct pci_dev *dev = entry->dev;
127		int pos = entry->msi_attrib.pos;
128
129		pci_write_config_dword(dev, msi_lower_address_reg(pos),
130					msg->address_lo);
131		if (entry->msi_attrib.is_64) {
132			pci_write_config_dword(dev, msi_upper_address_reg(pos),
133						msg->address_hi);
134			pci_write_config_word(dev, msi_data_reg(pos, 1),
135						msg->data);
136		} else {
137			pci_write_config_word(dev, msi_data_reg(pos, 0),
138						msg->data);
139		}
140		break;
141	}
142	case PCI_CAP_ID_MSIX:
143	{
144		void __iomem *base;
145		base = entry->mask_base +
146			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
147
148		writel(msg->address_lo,
149			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
150		writel(msg->address_hi,
151			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
152		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
153		break;
154	}
155	default:
156		BUG();
157	}
158}
159
160#ifdef CONFIG_SMP
161static void set_msi_affinity(unsigned int irq, cpumask_t cpu_mask)
162{
163	struct msi_desc *entry;
164	struct msi_msg msg;
165
166	entry = msi_desc[irq];
167	if (!entry || !entry->dev)
168		return;
169
170	read_msi_msg(entry, &msg);
171	msi_ops->target(irq, cpu_mask, &msg);
172	write_msi_msg(entry, &msg);
173	set_native_irq_info(irq, cpu_mask);
174}
175#else
176#define set_msi_affinity NULL
177#endif /* CONFIG_SMP */
178
179static void mask_MSI_irq(unsigned int irq)
180{
181	msi_set_mask_bit(irq, 1);
182}
183
184static void unmask_MSI_irq(unsigned int irq)
185{
186	msi_set_mask_bit(irq, 0);
187}
188
189static unsigned int startup_msi_irq_wo_maskbit(unsigned int irq)
190{
191	return 0;	/* never anything pending */
192}
193
194static unsigned int startup_msi_irq_w_maskbit(unsigned int irq)
195{
196	startup_msi_irq_wo_maskbit(irq);
197	unmask_MSI_irq(irq);
198	return 0;	/* never anything pending */
199}
200
201static void shutdown_msi_irq(unsigned int irq)
202{
203}
204
205static void end_msi_irq_wo_maskbit(unsigned int irq)
206{
207	move_native_irq(irq);
208	ack_APIC_irq();
209}
210
211static void end_msi_irq_w_maskbit(unsigned int irq)
212{
213	move_native_irq(irq);
214	unmask_MSI_irq(irq);
215	ack_APIC_irq();
216}
217
218static void do_nothing(unsigned int irq)
219{
220}
221
222/*
223 * Interrupt Type for MSI-X PCI/PCI-X/PCI-Express Devices,
224 * which implement the MSI-X Capability Structure.
225 */
226static struct hw_interrupt_type msix_irq_type = {
227	.typename	= "PCI-MSI-X",
228	.startup	= startup_msi_irq_w_maskbit,
229	.shutdown	= shutdown_msi_irq,
230	.enable		= unmask_MSI_irq,
231	.disable	= mask_MSI_irq,
232	.ack		= mask_MSI_irq,
233	.end		= end_msi_irq_w_maskbit,
234	.set_affinity	= set_msi_affinity
235};
236
237/*
238 * Interrupt Type for MSI PCI/PCI-X/PCI-Express Devices,
239 * which implement the MSI Capability Structure with
240 * Mask-and-Pending Bits.
241 */
242static struct hw_interrupt_type msi_irq_w_maskbit_type = {
243	.typename	= "PCI-MSI",
244	.startup	= startup_msi_irq_w_maskbit,
245	.shutdown	= shutdown_msi_irq,
246	.enable		= unmask_MSI_irq,
247	.disable	= mask_MSI_irq,
248	.ack		= mask_MSI_irq,
249	.end		= end_msi_irq_w_maskbit,
250	.set_affinity	= set_msi_affinity
251};
252
253/*
254 * Interrupt Type for MSI PCI/PCI-X/PCI-Express Devices,
255 * which implement the MSI Capability Structure without
256 * Mask-and-Pending Bits.
257 */
258static struct hw_interrupt_type msi_irq_wo_maskbit_type = {
259	.typename	= "PCI-MSI",
260	.startup	= startup_msi_irq_wo_maskbit,
261	.shutdown	= shutdown_msi_irq,
262	.enable		= do_nothing,
263	.disable	= do_nothing,
264	.ack		= do_nothing,
265	.end		= end_msi_irq_wo_maskbit,
266	.set_affinity	= set_msi_affinity
267};
268
269static int msi_free_irq(struct pci_dev* dev, int irq);
270static int msi_init(void)
271{
272	static int status = -ENOMEM;
273
274	if (!status)
275		return status;
276
277	if (pci_msi_quirk) {
278		pci_msi_enable = 0;
279		printk(KERN_WARNING "PCI: MSI quirk detected. MSI disabled.\n");
280		status = -EINVAL;
281		return status;
282	}
283
284	status = msi_arch_init();
285	if (status < 0) {
286		pci_msi_enable = 0;
287		printk(KERN_WARNING
288		       "PCI: MSI arch init failed.  MSI disabled.\n");
289		return status;
290	}
291
292	if (! msi_ops) {
293		pci_msi_enable = 0;
294		printk(KERN_WARNING
295		       "PCI: MSI ops not registered. MSI disabled.\n");
296		status = -EINVAL;
297		return status;
298	}
299
300	status = msi_cache_init();
301	if (status < 0) {
302		pci_msi_enable = 0;
303		printk(KERN_WARNING "PCI: MSI cache init failed\n");
304		return status;
305	}
306
307	return status;
308}
309
310static struct msi_desc* alloc_msi_entry(void)
311{
312	struct msi_desc *entry;
313
314	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
315	if (!entry)
316		return NULL;
317
318	entry->link.tail = entry->link.head = 0;	/* single message */
319	entry->dev = NULL;
320
321	return entry;
322}
323
324static void attach_msi_entry(struct msi_desc *entry, int irq)
325{
326	unsigned long flags;
327
328	spin_lock_irqsave(&msi_lock, flags);
329	msi_desc[irq] = entry;
330	spin_unlock_irqrestore(&msi_lock, flags);
331}
332
333static int create_msi_irq(struct hw_interrupt_type *handler)
334{
335	struct msi_desc *entry;
336	int irq;
337
338	entry = alloc_msi_entry();
339	if (!entry)
340		return -ENOMEM;
341
342	irq = create_irq();
343	if (irq < 0) {
344		kmem_cache_free(msi_cachep, entry);
345		return -EBUSY;
346	}
347
348	set_irq_chip(irq, handler);
349	set_irq_data(irq, entry);
350
351	return irq;
352}
353
354static void destroy_msi_irq(unsigned int irq)
355{
356	struct msi_desc *entry;
357
358	entry = get_irq_data(irq);
359	set_irq_chip(irq, NULL);
360	set_irq_data(irq, NULL);
361	destroy_irq(irq);
362	kmem_cache_free(msi_cachep, entry);
363}
364
365static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
366{
367	u16 control;
368
369	pci_read_config_word(dev, msi_control_reg(pos), &control);
370	if (type == PCI_CAP_ID_MSI) {
371		/* Set enabled bits to single MSI & enable MSI_enable bit */
372		msi_enable(control, 1);
373		pci_write_config_word(dev, msi_control_reg(pos), control);
374		dev->msi_enabled = 1;
375	} else {
376		msix_enable(control);
377		pci_write_config_word(dev, msi_control_reg(pos), control);
378		dev->msix_enabled = 1;
379	}
380    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
381		/* PCI Express Endpoint device detected */
382		pci_intx(dev, 0);  /* disable intx */
383	}
384}
385
386void disable_msi_mode(struct pci_dev *dev, int pos, int type)
387{
388	u16 control;
389
390	pci_read_config_word(dev, msi_control_reg(pos), &control);
391	if (type == PCI_CAP_ID_MSI) {
392		/* Set enabled bits to single MSI & enable MSI_enable bit */
393		msi_disable(control);
394		pci_write_config_word(dev, msi_control_reg(pos), control);
395		dev->msi_enabled = 0;
396	} else {
397		msix_disable(control);
398		pci_write_config_word(dev, msi_control_reg(pos), control);
399		dev->msix_enabled = 0;
400	}
401    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
402		/* PCI Express Endpoint device detected */
403		pci_intx(dev, 1);  /* enable intx */
404	}
405}
406
407static int msi_lookup_irq(struct pci_dev *dev, int type)
408{
409	int irq;
410	unsigned long flags;
411
412	spin_lock_irqsave(&msi_lock, flags);
413	for (irq = 0; irq < NR_IRQS; irq++) {
414		if (!msi_desc[irq] || msi_desc[irq]->dev != dev ||
415			msi_desc[irq]->msi_attrib.type != type ||
416			msi_desc[irq]->msi_attrib.default_irq != dev->irq)
417			continue;
418		spin_unlock_irqrestore(&msi_lock, flags);
419		/* This pre-assigned MSI irq for this device
420		   already exits. Override dev->irq with this irq */
421		dev->irq = irq;
422		return 0;
423	}
424	spin_unlock_irqrestore(&msi_lock, flags);
425
426	return -EACCES;
427}
428
429void pci_scan_msi_device(struct pci_dev *dev)
430{
431	if (!dev)
432		return;
433}
434
435#ifdef CONFIG_PM
436int pci_save_msi_state(struct pci_dev *dev)
437{
438	int pos, i = 0;
439	u16 control;
440	struct pci_cap_saved_state *save_state;
441	u32 *cap;
442
443	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
444	if (pos <= 0 || dev->no_msi)
445		return 0;
446
447	pci_read_config_word(dev, msi_control_reg(pos), &control);
448	if (!(control & PCI_MSI_FLAGS_ENABLE))
449		return 0;
450
451	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5,
452		GFP_KERNEL);
453	if (!save_state) {
454		printk(KERN_ERR "Out of memory in pci_save_msi_state\n");
455		return -ENOMEM;
456	}
457	cap = &save_state->data[0];
458
459	pci_read_config_dword(dev, pos, &cap[i++]);
460	control = cap[0] >> 16;
461	pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]);
462	if (control & PCI_MSI_FLAGS_64BIT) {
463		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]);
464		pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]);
465	} else
466		pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]);
467	if (control & PCI_MSI_FLAGS_MASKBIT)
468		pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]);
469	save_state->cap_nr = PCI_CAP_ID_MSI;
470	pci_add_saved_cap(dev, save_state);
471	return 0;
472}
473
474void pci_restore_msi_state(struct pci_dev *dev)
475{
476	int i = 0, pos;
477	u16 control;
478	struct pci_cap_saved_state *save_state;
479	u32 *cap;
480
481	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI);
482	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
483	if (!save_state || pos <= 0)
484		return;
485	cap = &save_state->data[0];
486
487	control = cap[i++] >> 16;
488	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]);
489	if (control & PCI_MSI_FLAGS_64BIT) {
490		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]);
491		pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]);
492	} else
493		pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]);
494	if (control & PCI_MSI_FLAGS_MASKBIT)
495		pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]);
496	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
497	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
498	pci_remove_saved_cap(save_state);
499	kfree(save_state);
500}
501
502int pci_save_msix_state(struct pci_dev *dev)
503{
504	int pos;
505	int temp;
506	int irq, head, tail = 0;
507	u16 control;
508	struct pci_cap_saved_state *save_state;
509
510	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
511	if (pos <= 0 || dev->no_msi)
512		return 0;
513
514	/* save the capability */
515	pci_read_config_word(dev, msi_control_reg(pos), &control);
516	if (!(control & PCI_MSIX_FLAGS_ENABLE))
517		return 0;
518	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16),
519		GFP_KERNEL);
520	if (!save_state) {
521		printk(KERN_ERR "Out of memory in pci_save_msix_state\n");
522		return -ENOMEM;
523	}
524	*((u16 *)&save_state->data[0]) = control;
525
526	/* save the table */
527	temp = dev->irq;
528	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
529		kfree(save_state);
530		return -EINVAL;
531	}
532
533	irq = head = dev->irq;
534	while (head != tail) {
535		struct msi_desc *entry;
536
537		entry = msi_desc[irq];
538		read_msi_msg(entry, &entry->msg_save);
539
540		tail = msi_desc[irq]->link.tail;
541		irq = tail;
542	}
543	dev->irq = temp;
544
545	save_state->cap_nr = PCI_CAP_ID_MSIX;
546	pci_add_saved_cap(dev, save_state);
547	return 0;
548}
549
550void pci_restore_msix_state(struct pci_dev *dev)
551{
552	u16 save;
553	int pos;
554	int irq, head, tail = 0;
555	struct msi_desc *entry;
556	int temp;
557	struct pci_cap_saved_state *save_state;
558
559	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX);
560	if (!save_state)
561		return;
562	save = *((u16 *)&save_state->data[0]);
563	pci_remove_saved_cap(save_state);
564	kfree(save_state);
565
566	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
567	if (pos <= 0)
568		return;
569
570	/* route the table */
571	temp = dev->irq;
572	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX))
573		return;
574	irq = head = dev->irq;
575	while (head != tail) {
576		entry = msi_desc[irq];
577		write_msi_msg(entry, &entry->msg_save);
578
579		tail = msi_desc[irq]->link.tail;
580		irq = tail;
581	}
582	dev->irq = temp;
583
584	pci_write_config_word(dev, msi_control_reg(pos), save);
585	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
586}
587#endif
588
589static int msi_register_init(struct pci_dev *dev, struct msi_desc *entry)
590{
591	int status;
592	struct msi_msg msg;
593	int pos;
594	u16 control;
595
596	pos = entry->msi_attrib.pos;
597	pci_read_config_word(dev, msi_control_reg(pos), &control);
598
599	/* Configure MSI capability structure */
600	status = msi_ops->setup(dev, dev->irq, &msg);
601	if (status < 0)
602		return status;
603
604	write_msi_msg(entry, &msg);
605	if (entry->msi_attrib.maskbit) {
606		unsigned int maskbits, temp;
607		/* All MSIs are unmasked by default, Mask them all */
608		pci_read_config_dword(dev,
609			msi_mask_bits_reg(pos, is_64bit_address(control)),
610			&maskbits);
611		temp = (1 << multi_msi_capable(control));
612		temp = ((temp - 1) & ~temp);
613		maskbits |= temp;
614		pci_write_config_dword(dev,
615			msi_mask_bits_reg(pos, is_64bit_address(control)),
616			maskbits);
617	}
618
619	return 0;
620}
621
622/**
623 * msi_capability_init - configure device's MSI capability structure
624 * @dev: pointer to the pci_dev data structure of MSI device function
625 *
626 * Setup the MSI capability structure of device function with a single
627 * MSI irq, regardless of device function is capable of handling
628 * multiple messages. A return of zero indicates the successful setup
629 * of an entry zero with the new MSI irq or non-zero for otherwise.
630 **/
631static int msi_capability_init(struct pci_dev *dev)
632{
633	int status;
634	struct msi_desc *entry;
635	int pos, irq;
636	u16 control;
637	struct hw_interrupt_type *handler;
638
639   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
640	pci_read_config_word(dev, msi_control_reg(pos), &control);
641	/* MSI Entry Initialization */
642	handler = &msi_irq_wo_maskbit_type;
643	if (is_mask_bit_support(control))
644		handler = &msi_irq_w_maskbit_type;
645
646	irq = create_msi_irq(handler);
647	if (irq < 0)
648		return irq;
649
650	entry = get_irq_data(irq);
651	entry->link.head = irq;
652	entry->link.tail = irq;
653	entry->msi_attrib.type = PCI_CAP_ID_MSI;
654	entry->msi_attrib.is_64 = is_64bit_address(control);
655	entry->msi_attrib.entry_nr = 0;
656	entry->msi_attrib.maskbit = is_mask_bit_support(control);
657	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
658	entry->msi_attrib.pos = pos;
659	dev->irq = irq;
660	entry->dev = dev;
661	if (is_mask_bit_support(control)) {
662		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
663				is_64bit_address(control));
664	}
665	/* Configure MSI capability structure */
666	status = msi_register_init(dev, entry);
667	if (status != 0) {
668		dev->irq = entry->msi_attrib.default_irq;
669		destroy_msi_irq(irq);
670		return status;
671	}
672
673	attach_msi_entry(entry, irq);
674	/* Set MSI enabled bits	 */
675	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
676
677	return 0;
678}
679
680/**
681 * msix_capability_init - configure device's MSI-X capability
682 * @dev: pointer to the pci_dev data structure of MSI-X device function
683 * @entries: pointer to an array of struct msix_entry entries
684 * @nvec: number of @entries
685 *
686 * Setup the MSI-X capability structure of device function with a
687 * single MSI-X irq. A return of zero indicates the successful setup of
688 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
689 **/
690static int msix_capability_init(struct pci_dev *dev,
691				struct msix_entry *entries, int nvec)
692{
693	struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
694	struct msi_msg msg;
695	int status;
696	int irq, pos, i, j, nr_entries, temp = 0;
697	unsigned long phys_addr;
698	u32 table_offset;
699 	u16 control;
700	u8 bir;
701	void __iomem *base;
702
703   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
704	/* Request & Map MSI-X table region */
705 	pci_read_config_word(dev, msi_control_reg(pos), &control);
706	nr_entries = multi_msix_capable(control);
707
708 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
709	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
710	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
711	phys_addr = pci_resource_start (dev, bir) + table_offset;
712	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
713	if (base == NULL)
714		return -ENOMEM;
715
716	/* MSI-X Table Initialization */
717	for (i = 0; i < nvec; i++) {
718		irq = create_msi_irq(&msix_irq_type);
719		if (irq < 0)
720			break;
721
722		entry = get_irq_data(irq);
723 		j = entries[i].entry;
724 		entries[i].vector = irq;
725		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
726		entry->msi_attrib.is_64 = 1;
727		entry->msi_attrib.entry_nr = j;
728		entry->msi_attrib.maskbit = 1;
729		entry->msi_attrib.default_irq = dev->irq;
730		entry->msi_attrib.pos = pos;
731		entry->dev = dev;
732		entry->mask_base = base;
733		if (!head) {
734			entry->link.head = irq;
735			entry->link.tail = irq;
736			head = entry;
737		} else {
738			entry->link.head = temp;
739			entry->link.tail = tail->link.tail;
740			tail->link.tail = irq;
741			head->link.head = irq;
742		}
743		temp = irq;
744		tail = entry;
745		/* Configure MSI-X capability structure */
746		status = msi_ops->setup(dev, irq, &msg);
747		if (status < 0) {
748			destroy_msi_irq(irq);
749			break;
750		}
751
752		write_msi_msg(entry, &msg);
753		attach_msi_entry(entry, irq);
754	}
755	if (i != nvec) {
756		int avail = i - 1;
757		i--;
758		for (; i >= 0; i--) {
759			irq = (entries + i)->vector;
760			msi_free_irq(dev, irq);
761			(entries + i)->vector = 0;
762		}
763		/* If we had some success report the number of irqs
764		 * we succeeded in setting up.
765		 */
766		if (avail <= 0)
767			avail = -EBUSY;
768		return avail;
769	}
770	/* Set MSI-X enabled bits */
771	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
772
773	return 0;
774}
775
776/**
777 * pci_msi_supported - check whether MSI may be enabled on device
778 * @dev: pointer to the pci_dev data structure of MSI device function
779 *
780 * MSI must be globally enabled and supported by the device and its root
781 * bus. But, the root bus is not easy to find since some architectures
782 * have virtual busses on top of the PCI hierarchy (for instance the
783 * hypertransport bus), while the actual bus where MSI must be supported
784 * is below. So we test the MSI flag on all parent busses and assume
785 * that no quirk will ever set the NO_MSI flag on a non-root bus.
786 **/
787static
788int pci_msi_supported(struct pci_dev * dev)
789{
790	struct pci_bus *bus;
791
792	if (!pci_msi_enable || !dev || dev->no_msi)
793		return -EINVAL;
794
795	/* check MSI flags of all parent busses */
796	for (bus = dev->bus; bus; bus = bus->parent)
797		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
798			return -EINVAL;
799
800	return 0;
801}
802
803/**
804 * pci_enable_msi - configure device's MSI capability structure
805 * @dev: pointer to the pci_dev data structure of MSI device function
806 *
807 * Setup the MSI capability structure of device function with
808 * a single MSI irq upon its software driver call to request for
809 * MSI mode enabled on its hardware device function. A return of zero
810 * indicates the successful setup of an entry zero with the new MSI
811 * irq or non-zero for otherwise.
812 **/
813int pci_enable_msi(struct pci_dev* dev)
814{
815	int pos, temp, status;
816	u16 control;
817
818	if (pci_msi_supported(dev) < 0)
819		return -EINVAL;
820
821	temp = dev->irq;
822
823	status = msi_init();
824	if (status < 0)
825		return status;
826
827	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
828	if (!pos)
829		return -EINVAL;
830
831	pci_read_config_word(dev, msi_control_reg(pos), &control);
832	if (!is_64bit_address(control) && msi_ops->needs_64bit_address)
833		return -EINVAL;
834
835	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI));
836
837	/* Check whether driver already requested for MSI-X irqs */
838	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
839	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
840			printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
841			       "Device already has MSI-X irq assigned\n",
842			       pci_name(dev));
843			dev->irq = temp;
844			return -EINVAL;
845	}
846	status = msi_capability_init(dev);
847	return status;
848}
849
850void pci_disable_msi(struct pci_dev* dev)
851{
852	struct msi_desc *entry;
853	int pos, default_irq;
854	u16 control;
855	unsigned long flags;
856
857	if (!pci_msi_enable)
858		return;
859	if (!dev)
860		return;
861
862	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
863	if (!pos)
864		return;
865
866	pci_read_config_word(dev, msi_control_reg(pos), &control);
867	if (!(control & PCI_MSI_FLAGS_ENABLE))
868		return;
869
870	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
871
872	spin_lock_irqsave(&msi_lock, flags);
873	entry = msi_desc[dev->irq];
874	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
875		spin_unlock_irqrestore(&msi_lock, flags);
876		return;
877	}
878	if (irq_has_action(dev->irq)) {
879		spin_unlock_irqrestore(&msi_lock, flags);
880		printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without "
881		       "free_irq() on MSI irq %d\n",
882		       pci_name(dev), dev->irq);
883		BUG_ON(irq_has_action(dev->irq));
884	} else {
885		default_irq = entry->msi_attrib.default_irq;
886		spin_unlock_irqrestore(&msi_lock, flags);
887		msi_free_irq(dev, dev->irq);
888
889		/* Restore dev->irq to its default pin-assertion irq */
890		dev->irq = default_irq;
891	}
892}
893
894static int msi_free_irq(struct pci_dev* dev, int irq)
895{
896	struct msi_desc *entry;
897	int head, entry_nr, type;
898	void __iomem *base;
899	unsigned long flags;
900
901	msi_ops->teardown(irq);
902
903	spin_lock_irqsave(&msi_lock, flags);
904	entry = msi_desc[irq];
905	if (!entry || entry->dev != dev) {
906		spin_unlock_irqrestore(&msi_lock, flags);
907		return -EINVAL;
908	}
909	type = entry->msi_attrib.type;
910	entry_nr = entry->msi_attrib.entry_nr;
911	head = entry->link.head;
912	base = entry->mask_base;
913	msi_desc[entry->link.head]->link.tail = entry->link.tail;
914	msi_desc[entry->link.tail]->link.head = entry->link.head;
915	entry->dev = NULL;
916	msi_desc[irq] = NULL;
917	spin_unlock_irqrestore(&msi_lock, flags);
918
919	destroy_msi_irq(irq);
920
921	if (type == PCI_CAP_ID_MSIX) {
922		writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE +
923			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
924
925		if (head == irq)
926			iounmap(base);
927	}
928
929	return 0;
930}
931
932/**
933 * pci_enable_msix - configure device's MSI-X capability structure
934 * @dev: pointer to the pci_dev data structure of MSI-X device function
935 * @entries: pointer to an array of MSI-X entries
936 * @nvec: number of MSI-X irqs requested for allocation by device driver
937 *
938 * Setup the MSI-X capability structure of device function with the number
939 * of requested irqs upon its software driver call to request for
940 * MSI-X mode enabled on its hardware device function. A return of zero
941 * indicates the successful configuration of MSI-X capability structure
942 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
943 * Or a return of > 0 indicates that driver request is exceeding the number
944 * of irqs available. Driver should use the returned value to re-send
945 * its request.
946 **/
947int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
948{
949	int status, pos, nr_entries;
950	int i, j, temp;
951	u16 control;
952
953	if (!entries || pci_msi_supported(dev) < 0)
954 		return -EINVAL;
955
956	status = msi_init();
957	if (status < 0)
958		return status;
959
960	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
961	if (!pos)
962 		return -EINVAL;
963
964	pci_read_config_word(dev, msi_control_reg(pos), &control);
965	nr_entries = multi_msix_capable(control);
966	if (nvec > nr_entries)
967		return -EINVAL;
968
969	/* Check for any invalid entries */
970	for (i = 0; i < nvec; i++) {
971		if (entries[i].entry >= nr_entries)
972			return -EINVAL;		/* invalid entry */
973		for (j = i + 1; j < nvec; j++) {
974			if (entries[i].entry == entries[j].entry)
975				return -EINVAL;	/* duplicate entry */
976		}
977	}
978	temp = dev->irq;
979	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX));
980
981	/* Check whether driver already requested for MSI irq */
982   	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
983		!msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
984		printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
985		       "Device already has an MSI irq assigned\n",
986		       pci_name(dev));
987		dev->irq = temp;
988		return -EINVAL;
989	}
990	status = msix_capability_init(dev, entries, nvec);
991	return status;
992}
993
994void pci_disable_msix(struct pci_dev* dev)
995{
996	int pos, temp;
997	u16 control;
998
999	if (!pci_msi_enable)
1000		return;
1001	if (!dev)
1002		return;
1003
1004	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1005	if (!pos)
1006		return;
1007
1008	pci_read_config_word(dev, msi_control_reg(pos), &control);
1009	if (!(control & PCI_MSIX_FLAGS_ENABLE))
1010		return;
1011
1012	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
1013
1014	temp = dev->irq;
1015	if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
1016		int irq, head, tail = 0, warning = 0;
1017		unsigned long flags;
1018
1019		irq = head = dev->irq;
1020		dev->irq = temp;			/* Restore pin IRQ */
1021		while (head != tail) {
1022			spin_lock_irqsave(&msi_lock, flags);
1023			tail = msi_desc[irq]->link.tail;
1024			spin_unlock_irqrestore(&msi_lock, flags);
1025			if (irq_has_action(irq))
1026				warning = 1;
1027			else if (irq != head)	/* Release MSI-X irq */
1028				msi_free_irq(dev, irq);
1029			irq = tail;
1030		}
1031		msi_free_irq(dev, irq);
1032		if (warning) {
1033			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
1034			       "free_irq() on all MSI-X irqs\n",
1035			       pci_name(dev));
1036			BUG_ON(warning > 0);
1037		}
1038	}
1039}
1040
1041/**
1042 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
1043 * @dev: pointer to the pci_dev data structure of MSI(X) device function
1044 *
1045 * Being called during hotplug remove, from which the device function
1046 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
1047 * allocated for this device function, are reclaimed to unused state,
1048 * which may be used later on.
1049 **/
1050void msi_remove_pci_irq_vectors(struct pci_dev* dev)
1051{
1052	int pos, temp;
1053	unsigned long flags;
1054
1055	if (!pci_msi_enable || !dev)
1056 		return;
1057
1058	temp = dev->irq;		/* Save IOAPIC IRQ */
1059	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
1060	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
1061		if (irq_has_action(dev->irq)) {
1062			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
1063			       "called without free_irq() on MSI irq %d\n",
1064			       pci_name(dev), dev->irq);
1065			BUG_ON(irq_has_action(dev->irq));
1066		} else /* Release MSI irq assigned to this device */
1067			msi_free_irq(dev, dev->irq);
1068		dev->irq = temp;		/* Restore IOAPIC IRQ */
1069	}
1070	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1071	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
1072		int irq, head, tail = 0, warning = 0;
1073		void __iomem *base = NULL;
1074
1075		irq = head = dev->irq;
1076		while (head != tail) {
1077			spin_lock_irqsave(&msi_lock, flags);
1078			tail = msi_desc[irq]->link.tail;
1079			base = msi_desc[irq]->mask_base;
1080			spin_unlock_irqrestore(&msi_lock, flags);
1081			if (irq_has_action(irq))
1082				warning = 1;
1083			else if (irq != head) /* Release MSI-X irq */
1084				msi_free_irq(dev, irq);
1085			irq = tail;
1086		}
1087		msi_free_irq(dev, irq);
1088		if (warning) {
1089			iounmap(base);
1090			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
1091			       "called without free_irq() on all MSI-X irqs\n",
1092			       pci_name(dev));
1093			BUG_ON(warning > 0);
1094		}
1095		dev->irq = temp;		/* Restore IOAPIC IRQ */
1096	}
1097}
1098
1099void pci_no_msi(void)
1100{
1101	pci_msi_enable = 0;
1102}
1103
1104EXPORT_SYMBOL(pci_enable_msi);
1105EXPORT_SYMBOL(pci_disable_msi);
1106EXPORT_SYMBOL(pci_enable_msix);
1107EXPORT_SYMBOL(pci_disable_msix);
1108