msi.c revision 1ce03373a7f4b5fa8ca5be02ff35229800a6e12b
1/*
2 * File:	msi.c
3 * Purpose:	PCI Message Signaled Interrupt (MSI)
4 *
5 * Copyright (C) 2003-2004 Intel
6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7 */
8
9#include <linux/err.h>
10#include <linux/mm.h>
11#include <linux/irq.h>
12#include <linux/interrupt.h>
13#include <linux/init.h>
14#include <linux/ioport.h>
15#include <linux/smp_lock.h>
16#include <linux/pci.h>
17#include <linux/proc_fs.h>
18
19#include <asm/errno.h>
20#include <asm/io.h>
21#include <asm/smp.h>
22
23#include "pci.h"
24#include "msi.h"
25
26static DEFINE_SPINLOCK(msi_lock);
27static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
28static kmem_cache_t* msi_cachep;
29
30static int pci_msi_enable = 1;
31
32static struct msi_ops *msi_ops;
33
34int
35msi_register(struct msi_ops *ops)
36{
37	msi_ops = ops;
38	return 0;
39}
40
41static int msi_cache_init(void)
42{
43	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
44					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
45	if (!msi_cachep)
46		return -ENOMEM;
47
48	return 0;
49}
50
51static void msi_set_mask_bit(unsigned int irq, int flag)
52{
53	struct msi_desc *entry;
54
55	entry = msi_desc[irq];
56	if (!entry || !entry->dev || !entry->mask_base)
57		return;
58	switch (entry->msi_attrib.type) {
59	case PCI_CAP_ID_MSI:
60	{
61		int		pos;
62		u32		mask_bits;
63
64		pos = (long)entry->mask_base;
65		pci_read_config_dword(entry->dev, pos, &mask_bits);
66		mask_bits &= ~(1);
67		mask_bits |= flag;
68		pci_write_config_dword(entry->dev, pos, mask_bits);
69		break;
70	}
71	case PCI_CAP_ID_MSIX:
72	{
73		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
74			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
75		writel(flag, entry->mask_base + offset);
76		break;
77	}
78	default:
79		break;
80	}
81}
82
83static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
84{
85	switch(entry->msi_attrib.type) {
86	case PCI_CAP_ID_MSI:
87	{
88		struct pci_dev *dev = entry->dev;
89		int pos = entry->msi_attrib.pos;
90		u16 data;
91
92		pci_read_config_dword(dev, msi_lower_address_reg(pos),
93					&msg->address_lo);
94		if (entry->msi_attrib.is_64) {
95			pci_read_config_dword(dev, msi_upper_address_reg(pos),
96						&msg->address_hi);
97			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
98		} else {
99			msg->address_hi = 0;
100			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
101		}
102		msg->data = data;
103		break;
104	}
105	case PCI_CAP_ID_MSIX:
106	{
107		void __iomem *base;
108		base = entry->mask_base +
109			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
110
111		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
112		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
113		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
114 		break;
115 	}
116 	default:
117		BUG();
118	}
119}
120
121static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
122{
123	switch (entry->msi_attrib.type) {
124	case PCI_CAP_ID_MSI:
125	{
126		struct pci_dev *dev = entry->dev;
127		int pos = entry->msi_attrib.pos;
128
129		pci_write_config_dword(dev, msi_lower_address_reg(pos),
130					msg->address_lo);
131		if (entry->msi_attrib.is_64) {
132			pci_write_config_dword(dev, msi_upper_address_reg(pos),
133						msg->address_hi);
134			pci_write_config_word(dev, msi_data_reg(pos, 1),
135						msg->data);
136		} else {
137			pci_write_config_word(dev, msi_data_reg(pos, 0),
138						msg->data);
139		}
140		break;
141	}
142	case PCI_CAP_ID_MSIX:
143	{
144		void __iomem *base;
145		base = entry->mask_base +
146			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
147
148		writel(msg->address_lo,
149			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
150		writel(msg->address_hi,
151			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
152		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
153		break;
154	}
155	default:
156		BUG();
157	}
158}
159
160#ifdef CONFIG_SMP
161static void set_msi_affinity(unsigned int irq, cpumask_t cpu_mask)
162{
163	struct msi_desc *entry;
164	struct msi_msg msg;
165
166	entry = msi_desc[irq];
167	if (!entry || !entry->dev)
168		return;
169
170	read_msi_msg(entry, &msg);
171	msi_ops->target(irq, cpu_mask, &msg);
172	write_msi_msg(entry, &msg);
173	set_native_irq_info(irq, cpu_mask);
174}
175#else
176#define set_msi_affinity NULL
177#endif /* CONFIG_SMP */
178
179static void mask_MSI_irq(unsigned int irq)
180{
181	msi_set_mask_bit(irq, 1);
182}
183
184static void unmask_MSI_irq(unsigned int irq)
185{
186	msi_set_mask_bit(irq, 0);
187}
188
189static unsigned int startup_msi_irq_wo_maskbit(unsigned int irq)
190{
191	struct msi_desc *entry;
192	unsigned long flags;
193
194	spin_lock_irqsave(&msi_lock, flags);
195	entry = msi_desc[irq];
196	if (!entry || !entry->dev) {
197		spin_unlock_irqrestore(&msi_lock, flags);
198		return 0;
199	}
200	entry->msi_attrib.state = 1;	/* Mark it active */
201	spin_unlock_irqrestore(&msi_lock, flags);
202
203	return 0;	/* never anything pending */
204}
205
206static unsigned int startup_msi_irq_w_maskbit(unsigned int irq)
207{
208	startup_msi_irq_wo_maskbit(irq);
209	unmask_MSI_irq(irq);
210	return 0;	/* never anything pending */
211}
212
213static void shutdown_msi_irq(unsigned int irq)
214{
215	struct msi_desc *entry;
216	unsigned long flags;
217
218	spin_lock_irqsave(&msi_lock, flags);
219	entry = msi_desc[irq];
220	if (entry && entry->dev)
221		entry->msi_attrib.state = 0;	/* Mark it not active */
222	spin_unlock_irqrestore(&msi_lock, flags);
223}
224
225static void end_msi_irq_wo_maskbit(unsigned int irq)
226{
227	move_native_irq(irq);
228	ack_APIC_irq();
229}
230
231static void end_msi_irq_w_maskbit(unsigned int irq)
232{
233	move_native_irq(irq);
234	unmask_MSI_irq(irq);
235	ack_APIC_irq();
236}
237
238static void do_nothing(unsigned int irq)
239{
240}
241
242/*
243 * Interrupt Type for MSI-X PCI/PCI-X/PCI-Express Devices,
244 * which implement the MSI-X Capability Structure.
245 */
246static struct hw_interrupt_type msix_irq_type = {
247	.typename	= "PCI-MSI-X",
248	.startup	= startup_msi_irq_w_maskbit,
249	.shutdown	= shutdown_msi_irq,
250	.enable		= unmask_MSI_irq,
251	.disable	= mask_MSI_irq,
252	.ack		= mask_MSI_irq,
253	.end		= end_msi_irq_w_maskbit,
254	.set_affinity	= set_msi_affinity
255};
256
257/*
258 * Interrupt Type for MSI PCI/PCI-X/PCI-Express Devices,
259 * which implement the MSI Capability Structure with
260 * Mask-and-Pending Bits.
261 */
262static struct hw_interrupt_type msi_irq_w_maskbit_type = {
263	.typename	= "PCI-MSI",
264	.startup	= startup_msi_irq_w_maskbit,
265	.shutdown	= shutdown_msi_irq,
266	.enable		= unmask_MSI_irq,
267	.disable	= mask_MSI_irq,
268	.ack		= mask_MSI_irq,
269	.end		= end_msi_irq_w_maskbit,
270	.set_affinity	= set_msi_affinity
271};
272
273/*
274 * Interrupt Type for MSI PCI/PCI-X/PCI-Express Devices,
275 * which implement the MSI Capability Structure without
276 * Mask-and-Pending Bits.
277 */
278static struct hw_interrupt_type msi_irq_wo_maskbit_type = {
279	.typename	= "PCI-MSI",
280	.startup	= startup_msi_irq_wo_maskbit,
281	.shutdown	= shutdown_msi_irq,
282	.enable		= do_nothing,
283	.disable	= do_nothing,
284	.ack		= do_nothing,
285	.end		= end_msi_irq_wo_maskbit,
286	.set_affinity	= set_msi_affinity
287};
288
289static int msi_free_irq(struct pci_dev* dev, int irq);
290static int msi_init(void)
291{
292	static int status = -ENOMEM;
293
294	if (!status)
295		return status;
296
297	if (pci_msi_quirk) {
298		pci_msi_enable = 0;
299		printk(KERN_WARNING "PCI: MSI quirk detected. MSI disabled.\n");
300		status = -EINVAL;
301		return status;
302	}
303
304	status = msi_arch_init();
305	if (status < 0) {
306		pci_msi_enable = 0;
307		printk(KERN_WARNING
308		       "PCI: MSI arch init failed.  MSI disabled.\n");
309		return status;
310	}
311
312	if (! msi_ops) {
313		pci_msi_enable = 0;
314		printk(KERN_WARNING
315		       "PCI: MSI ops not registered. MSI disabled.\n");
316		status = -EINVAL;
317		return status;
318	}
319
320	status = msi_cache_init();
321	if (status < 0) {
322		pci_msi_enable = 0;
323		printk(KERN_WARNING "PCI: MSI cache init failed\n");
324		return status;
325	}
326
327	return status;
328}
329
330static struct msi_desc* alloc_msi_entry(void)
331{
332	struct msi_desc *entry;
333
334	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
335	if (!entry)
336		return NULL;
337
338	entry->link.tail = entry->link.head = 0;	/* single message */
339	entry->dev = NULL;
340
341	return entry;
342}
343
344static void attach_msi_entry(struct msi_desc *entry, int irq)
345{
346	unsigned long flags;
347
348	spin_lock_irqsave(&msi_lock, flags);
349	msi_desc[irq] = entry;
350	spin_unlock_irqrestore(&msi_lock, flags);
351}
352
353static int create_msi_irq(struct hw_interrupt_type *handler)
354{
355	struct msi_desc *entry;
356	int irq;
357
358	entry = alloc_msi_entry();
359	if (!entry)
360		return -ENOMEM;
361
362	irq = create_irq();
363	if (irq < 0) {
364		kmem_cache_free(msi_cachep, entry);
365		return -EBUSY;
366	}
367
368	set_irq_chip(irq, handler);
369	set_irq_data(irq, entry);
370
371	return irq;
372}
373
374static void destroy_msi_irq(unsigned int irq)
375{
376	struct msi_desc *entry;
377
378	entry = get_irq_data(irq);
379	set_irq_chip(irq, NULL);
380	set_irq_data(irq, NULL);
381	destroy_irq(irq);
382	kmem_cache_free(msi_cachep, entry);
383}
384
385static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
386{
387	u16 control;
388
389	pci_read_config_word(dev, msi_control_reg(pos), &control);
390	if (type == PCI_CAP_ID_MSI) {
391		/* Set enabled bits to single MSI & enable MSI_enable bit */
392		msi_enable(control, 1);
393		pci_write_config_word(dev, msi_control_reg(pos), control);
394		dev->msi_enabled = 1;
395	} else {
396		msix_enable(control);
397		pci_write_config_word(dev, msi_control_reg(pos), control);
398		dev->msix_enabled = 1;
399	}
400    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
401		/* PCI Express Endpoint device detected */
402		pci_intx(dev, 0);  /* disable intx */
403	}
404}
405
406void disable_msi_mode(struct pci_dev *dev, int pos, int type)
407{
408	u16 control;
409
410	pci_read_config_word(dev, msi_control_reg(pos), &control);
411	if (type == PCI_CAP_ID_MSI) {
412		/* Set enabled bits to single MSI & enable MSI_enable bit */
413		msi_disable(control);
414		pci_write_config_word(dev, msi_control_reg(pos), control);
415		dev->msi_enabled = 0;
416	} else {
417		msix_disable(control);
418		pci_write_config_word(dev, msi_control_reg(pos), control);
419		dev->msix_enabled = 0;
420	}
421    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
422		/* PCI Express Endpoint device detected */
423		pci_intx(dev, 1);  /* enable intx */
424	}
425}
426
427static int msi_lookup_irq(struct pci_dev *dev, int type)
428{
429	int irq;
430	unsigned long flags;
431
432	spin_lock_irqsave(&msi_lock, flags);
433	for (irq = 0; irq < NR_IRQS; irq++) {
434		if (!msi_desc[irq] || msi_desc[irq]->dev != dev ||
435			msi_desc[irq]->msi_attrib.type != type ||
436			msi_desc[irq]->msi_attrib.default_irq != dev->irq)
437			continue;
438		spin_unlock_irqrestore(&msi_lock, flags);
439		/* This pre-assigned MSI irq for this device
440		   already exits. Override dev->irq with this irq */
441		dev->irq = irq;
442		return 0;
443	}
444	spin_unlock_irqrestore(&msi_lock, flags);
445
446	return -EACCES;
447}
448
449void pci_scan_msi_device(struct pci_dev *dev)
450{
451	if (!dev)
452		return;
453}
454
455#ifdef CONFIG_PM
456int pci_save_msi_state(struct pci_dev *dev)
457{
458	int pos, i = 0;
459	u16 control;
460	struct pci_cap_saved_state *save_state;
461	u32 *cap;
462
463	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
464	if (pos <= 0 || dev->no_msi)
465		return 0;
466
467	pci_read_config_word(dev, msi_control_reg(pos), &control);
468	if (!(control & PCI_MSI_FLAGS_ENABLE))
469		return 0;
470
471	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5,
472		GFP_KERNEL);
473	if (!save_state) {
474		printk(KERN_ERR "Out of memory in pci_save_msi_state\n");
475		return -ENOMEM;
476	}
477	cap = &save_state->data[0];
478
479	pci_read_config_dword(dev, pos, &cap[i++]);
480	control = cap[0] >> 16;
481	pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]);
482	if (control & PCI_MSI_FLAGS_64BIT) {
483		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]);
484		pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]);
485	} else
486		pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]);
487	if (control & PCI_MSI_FLAGS_MASKBIT)
488		pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]);
489	save_state->cap_nr = PCI_CAP_ID_MSI;
490	pci_add_saved_cap(dev, save_state);
491	return 0;
492}
493
494void pci_restore_msi_state(struct pci_dev *dev)
495{
496	int i = 0, pos;
497	u16 control;
498	struct pci_cap_saved_state *save_state;
499	u32 *cap;
500
501	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI);
502	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
503	if (!save_state || pos <= 0)
504		return;
505	cap = &save_state->data[0];
506
507	control = cap[i++] >> 16;
508	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]);
509	if (control & PCI_MSI_FLAGS_64BIT) {
510		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]);
511		pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]);
512	} else
513		pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]);
514	if (control & PCI_MSI_FLAGS_MASKBIT)
515		pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]);
516	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
517	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
518	pci_remove_saved_cap(save_state);
519	kfree(save_state);
520}
521
522int pci_save_msix_state(struct pci_dev *dev)
523{
524	int pos;
525	int temp;
526	int irq, head, tail = 0;
527	u16 control;
528	struct pci_cap_saved_state *save_state;
529
530	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
531	if (pos <= 0 || dev->no_msi)
532		return 0;
533
534	/* save the capability */
535	pci_read_config_word(dev, msi_control_reg(pos), &control);
536	if (!(control & PCI_MSIX_FLAGS_ENABLE))
537		return 0;
538	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16),
539		GFP_KERNEL);
540	if (!save_state) {
541		printk(KERN_ERR "Out of memory in pci_save_msix_state\n");
542		return -ENOMEM;
543	}
544	*((u16 *)&save_state->data[0]) = control;
545
546	/* save the table */
547	temp = dev->irq;
548	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
549		kfree(save_state);
550		return -EINVAL;
551	}
552
553	irq = head = dev->irq;
554	while (head != tail) {
555		struct msi_desc *entry;
556
557		entry = msi_desc[irq];
558		read_msi_msg(entry, &entry->msg_save);
559
560		tail = msi_desc[irq]->link.tail;
561		irq = tail;
562	}
563	dev->irq = temp;
564
565	save_state->cap_nr = PCI_CAP_ID_MSIX;
566	pci_add_saved_cap(dev, save_state);
567	return 0;
568}
569
570void pci_restore_msix_state(struct pci_dev *dev)
571{
572	u16 save;
573	int pos;
574	int irq, head, tail = 0;
575	struct msi_desc *entry;
576	int temp;
577	struct pci_cap_saved_state *save_state;
578
579	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX);
580	if (!save_state)
581		return;
582	save = *((u16 *)&save_state->data[0]);
583	pci_remove_saved_cap(save_state);
584	kfree(save_state);
585
586	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
587	if (pos <= 0)
588		return;
589
590	/* route the table */
591	temp = dev->irq;
592	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX))
593		return;
594	irq = head = dev->irq;
595	while (head != tail) {
596		entry = msi_desc[irq];
597		write_msi_msg(entry, &entry->msg_save);
598
599		tail = msi_desc[irq]->link.tail;
600		irq = tail;
601	}
602	dev->irq = temp;
603
604	pci_write_config_word(dev, msi_control_reg(pos), save);
605	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
606}
607#endif
608
609static int msi_register_init(struct pci_dev *dev, struct msi_desc *entry)
610{
611	int status;
612	struct msi_msg msg;
613	int pos;
614	u16 control;
615
616	pos = entry->msi_attrib.pos;
617	pci_read_config_word(dev, msi_control_reg(pos), &control);
618
619	/* Configure MSI capability structure */
620	status = msi_ops->setup(dev, dev->irq, &msg);
621	if (status < 0)
622		return status;
623
624	write_msi_msg(entry, &msg);
625	if (entry->msi_attrib.maskbit) {
626		unsigned int maskbits, temp;
627		/* All MSIs are unmasked by default, Mask them all */
628		pci_read_config_dword(dev,
629			msi_mask_bits_reg(pos, is_64bit_address(control)),
630			&maskbits);
631		temp = (1 << multi_msi_capable(control));
632		temp = ((temp - 1) & ~temp);
633		maskbits |= temp;
634		pci_write_config_dword(dev,
635			msi_mask_bits_reg(pos, is_64bit_address(control)),
636			maskbits);
637	}
638
639	return 0;
640}
641
642/**
643 * msi_capability_init - configure device's MSI capability structure
644 * @dev: pointer to the pci_dev data structure of MSI device function
645 *
646 * Setup the MSI capability structure of device function with a single
647 * MSI irq, regardless of device function is capable of handling
648 * multiple messages. A return of zero indicates the successful setup
649 * of an entry zero with the new MSI irq or non-zero for otherwise.
650 **/
651static int msi_capability_init(struct pci_dev *dev)
652{
653	int status;
654	struct msi_desc *entry;
655	int pos, irq;
656	u16 control;
657	struct hw_interrupt_type *handler;
658
659   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
660	pci_read_config_word(dev, msi_control_reg(pos), &control);
661	/* MSI Entry Initialization */
662	handler = &msi_irq_wo_maskbit_type;
663	if (is_mask_bit_support(control))
664		handler = &msi_irq_w_maskbit_type;
665
666	irq = create_msi_irq(handler);
667	if (irq < 0)
668		return irq;
669
670	entry = get_irq_data(irq);
671	entry->link.head = irq;
672	entry->link.tail = irq;
673	entry->msi_attrib.type = PCI_CAP_ID_MSI;
674	entry->msi_attrib.state = 0;			/* Mark it not active */
675	entry->msi_attrib.is_64 = is_64bit_address(control);
676	entry->msi_attrib.entry_nr = 0;
677	entry->msi_attrib.maskbit = is_mask_bit_support(control);
678	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
679	entry->msi_attrib.pos = pos;
680	dev->irq = irq;
681	entry->dev = dev;
682	if (is_mask_bit_support(control)) {
683		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
684				is_64bit_address(control));
685	}
686	/* Configure MSI capability structure */
687	status = msi_register_init(dev, entry);
688	if (status != 0) {
689		dev->irq = entry->msi_attrib.default_irq;
690		destroy_msi_irq(irq);
691		return status;
692	}
693
694	attach_msi_entry(entry, irq);
695	/* Set MSI enabled bits	 */
696	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
697
698	return 0;
699}
700
701/**
702 * msix_capability_init - configure device's MSI-X capability
703 * @dev: pointer to the pci_dev data structure of MSI-X device function
704 * @entries: pointer to an array of struct msix_entry entries
705 * @nvec: number of @entries
706 *
707 * Setup the MSI-X capability structure of device function with a
708 * single MSI-X irq. A return of zero indicates the successful setup of
709 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
710 **/
711static int msix_capability_init(struct pci_dev *dev,
712				struct msix_entry *entries, int nvec)
713{
714	struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
715	struct msi_msg msg;
716	int status;
717	int irq, pos, i, j, nr_entries, temp = 0;
718	unsigned long phys_addr;
719	u32 table_offset;
720 	u16 control;
721	u8 bir;
722	void __iomem *base;
723
724   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
725	/* Request & Map MSI-X table region */
726 	pci_read_config_word(dev, msi_control_reg(pos), &control);
727	nr_entries = multi_msix_capable(control);
728
729 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
730	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
731	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
732	phys_addr = pci_resource_start (dev, bir) + table_offset;
733	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
734	if (base == NULL)
735		return -ENOMEM;
736
737	/* MSI-X Table Initialization */
738	for (i = 0; i < nvec; i++) {
739		irq = create_msi_irq(&msix_irq_type);
740		if (irq < 0)
741			break;
742
743		entry = get_irq_data(irq);
744 		j = entries[i].entry;
745 		entries[i].vector = irq;
746		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
747 		entry->msi_attrib.state = 0;		/* Mark it not active */
748		entry->msi_attrib.is_64 = 1;
749		entry->msi_attrib.entry_nr = j;
750		entry->msi_attrib.maskbit = 1;
751		entry->msi_attrib.default_irq = dev->irq;
752		entry->msi_attrib.pos = pos;
753		entry->dev = dev;
754		entry->mask_base = base;
755		if (!head) {
756			entry->link.head = irq;
757			entry->link.tail = irq;
758			head = entry;
759		} else {
760			entry->link.head = temp;
761			entry->link.tail = tail->link.tail;
762			tail->link.tail = irq;
763			head->link.head = irq;
764		}
765		temp = irq;
766		tail = entry;
767		/* Configure MSI-X capability structure */
768		status = msi_ops->setup(dev, irq, &msg);
769		if (status < 0) {
770			destroy_msi_irq(irq);
771			break;
772		}
773
774		write_msi_msg(entry, &msg);
775		attach_msi_entry(entry, irq);
776	}
777	if (i != nvec) {
778		int avail = i - 1;
779		i--;
780		for (; i >= 0; i--) {
781			irq = (entries + i)->vector;
782			msi_free_irq(dev, irq);
783			(entries + i)->vector = 0;
784		}
785		/* If we had some success report the number of irqs
786		 * we succeeded in setting up.
787		 */
788		if (avail <= 0)
789			avail = -EBUSY;
790		return avail;
791	}
792	/* Set MSI-X enabled bits */
793	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
794
795	return 0;
796}
797
798/**
799 * pci_msi_supported - check whether MSI may be enabled on device
800 * @dev: pointer to the pci_dev data structure of MSI device function
801 *
802 * MSI must be globally enabled and supported by the device and its root
803 * bus. But, the root bus is not easy to find since some architectures
804 * have virtual busses on top of the PCI hierarchy (for instance the
805 * hypertransport bus), while the actual bus where MSI must be supported
806 * is below. So we test the MSI flag on all parent busses and assume
807 * that no quirk will ever set the NO_MSI flag on a non-root bus.
808 **/
809static
810int pci_msi_supported(struct pci_dev * dev)
811{
812	struct pci_bus *bus;
813
814	if (!pci_msi_enable || !dev || dev->no_msi)
815		return -EINVAL;
816
817	/* check MSI flags of all parent busses */
818	for (bus = dev->bus; bus; bus = bus->parent)
819		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
820			return -EINVAL;
821
822	return 0;
823}
824
825/**
826 * pci_enable_msi - configure device's MSI capability structure
827 * @dev: pointer to the pci_dev data structure of MSI device function
828 *
829 * Setup the MSI capability structure of device function with
830 * a single MSI irq upon its software driver call to request for
831 * MSI mode enabled on its hardware device function. A return of zero
832 * indicates the successful setup of an entry zero with the new MSI
833 * irq or non-zero for otherwise.
834 **/
835int pci_enable_msi(struct pci_dev* dev)
836{
837	int pos, temp, status;
838	u16 control;
839
840	if (pci_msi_supported(dev) < 0)
841		return -EINVAL;
842
843	temp = dev->irq;
844
845	status = msi_init();
846	if (status < 0)
847		return status;
848
849	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
850	if (!pos)
851		return -EINVAL;
852
853	pci_read_config_word(dev, msi_control_reg(pos), &control);
854	if (!is_64bit_address(control) && msi_ops->needs_64bit_address)
855		return -EINVAL;
856
857	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI));
858
859	/* Check whether driver already requested for MSI-X irqs */
860	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
861	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
862			printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
863			       "Device already has MSI-X irq assigned\n",
864			       pci_name(dev));
865			dev->irq = temp;
866			return -EINVAL;
867	}
868	status = msi_capability_init(dev);
869	return status;
870}
871
872void pci_disable_msi(struct pci_dev* dev)
873{
874	struct msi_desc *entry;
875	int pos, default_irq;
876	u16 control;
877	unsigned long flags;
878
879	if (!pci_msi_enable)
880		return;
881	if (!dev)
882		return;
883
884	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
885	if (!pos)
886		return;
887
888	pci_read_config_word(dev, msi_control_reg(pos), &control);
889	if (!(control & PCI_MSI_FLAGS_ENABLE))
890		return;
891
892	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
893
894	spin_lock_irqsave(&msi_lock, flags);
895	entry = msi_desc[dev->irq];
896	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
897		spin_unlock_irqrestore(&msi_lock, flags);
898		return;
899	}
900	if (entry->msi_attrib.state) {
901		spin_unlock_irqrestore(&msi_lock, flags);
902		printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without "
903		       "free_irq() on MSI irq %d\n",
904		       pci_name(dev), dev->irq);
905		BUG_ON(entry->msi_attrib.state > 0);
906	} else {
907		default_irq = entry->msi_attrib.default_irq;
908		spin_unlock_irqrestore(&msi_lock, flags);
909		msi_free_irq(dev, dev->irq);
910
911		/* Restore dev->irq to its default pin-assertion irq */
912		dev->irq = default_irq;
913	}
914}
915
916static int msi_free_irq(struct pci_dev* dev, int irq)
917{
918	struct msi_desc *entry;
919	int head, entry_nr, type;
920	void __iomem *base;
921	unsigned long flags;
922
923	msi_ops->teardown(irq);
924
925	spin_lock_irqsave(&msi_lock, flags);
926	entry = msi_desc[irq];
927	if (!entry || entry->dev != dev) {
928		spin_unlock_irqrestore(&msi_lock, flags);
929		return -EINVAL;
930	}
931	type = entry->msi_attrib.type;
932	entry_nr = entry->msi_attrib.entry_nr;
933	head = entry->link.head;
934	base = entry->mask_base;
935	msi_desc[entry->link.head]->link.tail = entry->link.tail;
936	msi_desc[entry->link.tail]->link.head = entry->link.head;
937	entry->dev = NULL;
938	msi_desc[irq] = NULL;
939	spin_unlock_irqrestore(&msi_lock, flags);
940
941	destroy_msi_irq(irq);
942
943	if (type == PCI_CAP_ID_MSIX) {
944		writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE +
945			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
946
947		if (head == irq)
948			iounmap(base);
949	}
950
951	return 0;
952}
953
954/**
955 * pci_enable_msix - configure device's MSI-X capability structure
956 * @dev: pointer to the pci_dev data structure of MSI-X device function
957 * @entries: pointer to an array of MSI-X entries
958 * @nvec: number of MSI-X irqs requested for allocation by device driver
959 *
960 * Setup the MSI-X capability structure of device function with the number
961 * of requested irqs upon its software driver call to request for
962 * MSI-X mode enabled on its hardware device function. A return of zero
963 * indicates the successful configuration of MSI-X capability structure
964 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
965 * Or a return of > 0 indicates that driver request is exceeding the number
966 * of irqs available. Driver should use the returned value to re-send
967 * its request.
968 **/
969int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
970{
971	int status, pos, nr_entries;
972	int i, j, temp;
973	u16 control;
974
975	if (!entries || pci_msi_supported(dev) < 0)
976 		return -EINVAL;
977
978	status = msi_init();
979	if (status < 0)
980		return status;
981
982	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
983	if (!pos)
984 		return -EINVAL;
985
986	pci_read_config_word(dev, msi_control_reg(pos), &control);
987	nr_entries = multi_msix_capable(control);
988	if (nvec > nr_entries)
989		return -EINVAL;
990
991	/* Check for any invalid entries */
992	for (i = 0; i < nvec; i++) {
993		if (entries[i].entry >= nr_entries)
994			return -EINVAL;		/* invalid entry */
995		for (j = i + 1; j < nvec; j++) {
996			if (entries[i].entry == entries[j].entry)
997				return -EINVAL;	/* duplicate entry */
998		}
999	}
1000	temp = dev->irq;
1001	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX));
1002
1003	/* Check whether driver already requested for MSI irq */
1004   	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
1005		!msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
1006		printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
1007		       "Device already has an MSI irq assigned\n",
1008		       pci_name(dev));
1009		dev->irq = temp;
1010		return -EINVAL;
1011	}
1012	status = msix_capability_init(dev, entries, nvec);
1013	return status;
1014}
1015
1016void pci_disable_msix(struct pci_dev* dev)
1017{
1018	int pos, temp;
1019	u16 control;
1020
1021	if (!pci_msi_enable)
1022		return;
1023	if (!dev)
1024		return;
1025
1026	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1027	if (!pos)
1028		return;
1029
1030	pci_read_config_word(dev, msi_control_reg(pos), &control);
1031	if (!(control & PCI_MSIX_FLAGS_ENABLE))
1032		return;
1033
1034	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
1035
1036	temp = dev->irq;
1037	if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
1038		int state, irq, head, tail = 0, warning = 0;
1039		unsigned long flags;
1040
1041		irq = head = dev->irq;
1042		dev->irq = temp;			/* Restore pin IRQ */
1043		while (head != tail) {
1044			spin_lock_irqsave(&msi_lock, flags);
1045			state = msi_desc[irq]->msi_attrib.state;
1046			tail = msi_desc[irq]->link.tail;
1047			spin_unlock_irqrestore(&msi_lock, flags);
1048			if (state)
1049				warning = 1;
1050			else if (irq != head)	/* Release MSI-X irq */
1051				msi_free_irq(dev, irq);
1052			irq = tail;
1053		}
1054		msi_free_irq(dev, irq);
1055		if (warning) {
1056			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
1057			       "free_irq() on all MSI-X irqs\n",
1058			       pci_name(dev));
1059			BUG_ON(warning > 0);
1060		}
1061	}
1062}
1063
1064/**
1065 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
1066 * @dev: pointer to the pci_dev data structure of MSI(X) device function
1067 *
1068 * Being called during hotplug remove, from which the device function
1069 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
1070 * allocated for this device function, are reclaimed to unused state,
1071 * which may be used later on.
1072 **/
1073void msi_remove_pci_irq_vectors(struct pci_dev* dev)
1074{
1075	int state, pos, temp;
1076	unsigned long flags;
1077
1078	if (!pci_msi_enable || !dev)
1079 		return;
1080
1081	temp = dev->irq;		/* Save IOAPIC IRQ */
1082	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
1083	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
1084		spin_lock_irqsave(&msi_lock, flags);
1085		state = msi_desc[dev->irq]->msi_attrib.state;
1086		spin_unlock_irqrestore(&msi_lock, flags);
1087		if (state) {
1088			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
1089			       "called without free_irq() on MSI irq %d\n",
1090			       pci_name(dev), dev->irq);
1091			BUG_ON(state > 0);
1092		} else /* Release MSI irq assigned to this device */
1093			msi_free_irq(dev, dev->irq);
1094		dev->irq = temp;		/* Restore IOAPIC IRQ */
1095	}
1096	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1097	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
1098		int irq, head, tail = 0, warning = 0;
1099		void __iomem *base = NULL;
1100
1101		irq = head = dev->irq;
1102		while (head != tail) {
1103			spin_lock_irqsave(&msi_lock, flags);
1104			state = msi_desc[irq]->msi_attrib.state;
1105			tail = msi_desc[irq]->link.tail;
1106			base = msi_desc[irq]->mask_base;
1107			spin_unlock_irqrestore(&msi_lock, flags);
1108			if (state)
1109				warning = 1;
1110			else if (irq != head) /* Release MSI-X irq */
1111				msi_free_irq(dev, irq);
1112			irq = tail;
1113		}
1114		msi_free_irq(dev, irq);
1115		if (warning) {
1116			iounmap(base);
1117			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
1118			       "called without free_irq() on all MSI-X irqs\n",
1119			       pci_name(dev));
1120			BUG_ON(warning > 0);
1121		}
1122		dev->irq = temp;		/* Restore IOAPIC IRQ */
1123	}
1124}
1125
1126void pci_no_msi(void)
1127{
1128	pci_msi_enable = 0;
1129}
1130
1131EXPORT_SYMBOL(pci_enable_msi);
1132EXPORT_SYMBOL(pci_disable_msi);
1133EXPORT_SYMBOL(pci_enable_msix);
1134EXPORT_SYMBOL(pci_disable_msix);
1135