netpoll.c revision 2cde6acd49daca58b96f1fbc697492825511ad31
1/*
2 * Common framework for low-level network console, dump, and debugger code
3 *
4 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5 *
6 * based on the netconsole code from:
7 *
8 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2002  Red Hat, Inc.
10 */
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/moduleparam.h>
15#include <linux/netdevice.h>
16#include <linux/etherdevice.h>
17#include <linux/string.h>
18#include <linux/if_arp.h>
19#include <linux/inetdevice.h>
20#include <linux/inet.h>
21#include <linux/interrupt.h>
22#include <linux/netpoll.h>
23#include <linux/sched.h>
24#include <linux/delay.h>
25#include <linux/rcupdate.h>
26#include <linux/workqueue.h>
27#include <linux/slab.h>
28#include <linux/export.h>
29#include <linux/if_vlan.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/addrconf.h>
33#include <net/ndisc.h>
34#include <net/ip6_checksum.h>
35#include <asm/unaligned.h>
36#include <trace/events/napi.h>
37
38/*
39 * We maintain a small pool of fully-sized skbs, to make sure the
40 * message gets out even in extreme OOM situations.
41 */
42
43#define MAX_UDP_CHUNK 1460
44#define MAX_SKBS 32
45
46static struct sk_buff_head skb_pool;
47
48static atomic_t trapped;
49
50static struct srcu_struct netpoll_srcu;
51
52#define USEC_PER_POLL	50
53#define NETPOLL_RX_ENABLED  1
54#define NETPOLL_RX_DROP     2
55
56#define MAX_SKB_SIZE							\
57	(sizeof(struct ethhdr) +					\
58	 sizeof(struct iphdr) +						\
59	 sizeof(struct udphdr) +					\
60	 MAX_UDP_CHUNK)
61
62static void zap_completion_queue(void);
63static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
64static void netpoll_async_cleanup(struct work_struct *work);
65
66static unsigned int carrier_timeout = 4;
67module_param(carrier_timeout, uint, 0644);
68
69#define np_info(np, fmt, ...)				\
70	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
71#define np_err(np, fmt, ...)				\
72	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
73#define np_notice(np, fmt, ...)				\
74	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
75
76static void queue_process(struct work_struct *work)
77{
78	struct netpoll_info *npinfo =
79		container_of(work, struct netpoll_info, tx_work.work);
80	struct sk_buff *skb;
81	unsigned long flags;
82
83	while ((skb = skb_dequeue(&npinfo->txq))) {
84		struct net_device *dev = skb->dev;
85		const struct net_device_ops *ops = dev->netdev_ops;
86		struct netdev_queue *txq;
87
88		if (!netif_device_present(dev) || !netif_running(dev)) {
89			__kfree_skb(skb);
90			continue;
91		}
92
93		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
94
95		local_irq_save(flags);
96		__netif_tx_lock(txq, smp_processor_id());
97		if (netif_xmit_frozen_or_stopped(txq) ||
98		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
99			skb_queue_head(&npinfo->txq, skb);
100			__netif_tx_unlock(txq);
101			local_irq_restore(flags);
102
103			schedule_delayed_work(&npinfo->tx_work, HZ/10);
104			return;
105		}
106		__netif_tx_unlock(txq);
107		local_irq_restore(flags);
108	}
109}
110
111static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
112			    unsigned short ulen, __be32 saddr, __be32 daddr)
113{
114	__wsum psum;
115
116	if (uh->check == 0 || skb_csum_unnecessary(skb))
117		return 0;
118
119	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
120
121	if (skb->ip_summed == CHECKSUM_COMPLETE &&
122	    !csum_fold(csum_add(psum, skb->csum)))
123		return 0;
124
125	skb->csum = psum;
126
127	return __skb_checksum_complete(skb);
128}
129
130/*
131 * Check whether delayed processing was scheduled for our NIC. If so,
132 * we attempt to grab the poll lock and use ->poll() to pump the card.
133 * If this fails, either we've recursed in ->poll() or it's already
134 * running on another CPU.
135 *
136 * Note: we don't mask interrupts with this lock because we're using
137 * trylock here and interrupts are already disabled in the softirq
138 * case. Further, we test the poll_owner to avoid recursion on UP
139 * systems where the lock doesn't exist.
140 *
141 * In cases where there is bi-directional communications, reading only
142 * one message at a time can lead to packets being dropped by the
143 * network adapter, forcing superfluous retries and possibly timeouts.
144 * Thus, we set our budget to greater than 1.
145 */
146static int poll_one_napi(struct netpoll_info *npinfo,
147			 struct napi_struct *napi, int budget)
148{
149	int work;
150
151	/* net_rx_action's ->poll() invocations and our's are
152	 * synchronized by this test which is only made while
153	 * holding the napi->poll_lock.
154	 */
155	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
156		return budget;
157
158	npinfo->rx_flags |= NETPOLL_RX_DROP;
159	atomic_inc(&trapped);
160	set_bit(NAPI_STATE_NPSVC, &napi->state);
161
162	work = napi->poll(napi, budget);
163	trace_napi_poll(napi);
164
165	clear_bit(NAPI_STATE_NPSVC, &napi->state);
166	atomic_dec(&trapped);
167	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
168
169	return budget - work;
170}
171
172static void poll_napi(struct net_device *dev)
173{
174	struct napi_struct *napi;
175	int budget = 16;
176
177	list_for_each_entry(napi, &dev->napi_list, dev_list) {
178		if (napi->poll_owner != smp_processor_id() &&
179		    spin_trylock(&napi->poll_lock)) {
180			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
181					       napi, budget);
182			spin_unlock(&napi->poll_lock);
183
184			if (!budget)
185				break;
186		}
187	}
188}
189
190static void service_neigh_queue(struct netpoll_info *npi)
191{
192	if (npi) {
193		struct sk_buff *skb;
194
195		while ((skb = skb_dequeue(&npi->neigh_tx)))
196			netpoll_neigh_reply(skb, npi);
197	}
198}
199
200static void netpoll_poll_dev(struct net_device *dev)
201{
202	const struct net_device_ops *ops;
203	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
204
205	/* Don't do any rx activity if the dev_lock mutex is held
206	 * the dev_open/close paths use this to block netpoll activity
207	 * while changing device state
208	 */
209	if (!mutex_trylock(&dev->npinfo->dev_lock))
210		return;
211
212	if (!dev || !netif_running(dev))
213		return;
214
215	ops = dev->netdev_ops;
216	if (!ops->ndo_poll_controller)
217		return;
218
219	/* Process pending work on NIC */
220	ops->ndo_poll_controller(dev);
221
222	poll_napi(dev);
223
224	mutex_unlock(&dev->npinfo->dev_lock);
225
226	if (dev->flags & IFF_SLAVE) {
227		if (ni) {
228			struct net_device *bond_dev;
229			struct sk_buff *skb;
230			struct netpoll_info *bond_ni;
231
232			bond_dev = netdev_master_upper_dev_get_rcu(dev);
233			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
234			while ((skb = skb_dequeue(&ni->neigh_tx))) {
235				skb->dev = bond_dev;
236				skb_queue_tail(&bond_ni->neigh_tx, skb);
237			}
238		}
239	}
240
241	service_neigh_queue(ni);
242
243	zap_completion_queue();
244}
245
246int netpoll_rx_disable(struct net_device *dev)
247{
248	struct netpoll_info *ni;
249	int idx;
250	might_sleep();
251	idx = srcu_read_lock(&netpoll_srcu);
252	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
253	if (ni)
254		mutex_lock(&ni->dev_lock);
255	srcu_read_unlock(&netpoll_srcu, idx);
256	return 0;
257}
258EXPORT_SYMBOL(netpoll_rx_disable);
259
260void netpoll_rx_enable(struct net_device *dev)
261{
262	struct netpoll_info *ni;
263	rcu_read_lock();
264	ni = rcu_dereference(dev->npinfo);
265	if (ni)
266		mutex_unlock(&ni->dev_lock);
267	rcu_read_unlock();
268}
269EXPORT_SYMBOL(netpoll_rx_enable);
270
271static void refill_skbs(void)
272{
273	struct sk_buff *skb;
274	unsigned long flags;
275
276	spin_lock_irqsave(&skb_pool.lock, flags);
277	while (skb_pool.qlen < MAX_SKBS) {
278		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
279		if (!skb)
280			break;
281
282		__skb_queue_tail(&skb_pool, skb);
283	}
284	spin_unlock_irqrestore(&skb_pool.lock, flags);
285}
286
287static void zap_completion_queue(void)
288{
289	unsigned long flags;
290	struct softnet_data *sd = &get_cpu_var(softnet_data);
291
292	if (sd->completion_queue) {
293		struct sk_buff *clist;
294
295		local_irq_save(flags);
296		clist = sd->completion_queue;
297		sd->completion_queue = NULL;
298		local_irq_restore(flags);
299
300		while (clist != NULL) {
301			struct sk_buff *skb = clist;
302			clist = clist->next;
303			if (skb->destructor) {
304				atomic_inc(&skb->users);
305				dev_kfree_skb_any(skb); /* put this one back */
306			} else {
307				__kfree_skb(skb);
308			}
309		}
310	}
311
312	put_cpu_var(softnet_data);
313}
314
315static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
316{
317	int count = 0;
318	struct sk_buff *skb;
319
320	zap_completion_queue();
321	refill_skbs();
322repeat:
323
324	skb = alloc_skb(len, GFP_ATOMIC);
325	if (!skb)
326		skb = skb_dequeue(&skb_pool);
327
328	if (!skb) {
329		if (++count < 10) {
330			netpoll_poll_dev(np->dev);
331			goto repeat;
332		}
333		return NULL;
334	}
335
336	atomic_set(&skb->users, 1);
337	skb_reserve(skb, reserve);
338	return skb;
339}
340
341static int netpoll_owner_active(struct net_device *dev)
342{
343	struct napi_struct *napi;
344
345	list_for_each_entry(napi, &dev->napi_list, dev_list) {
346		if (napi->poll_owner == smp_processor_id())
347			return 1;
348	}
349	return 0;
350}
351
352/* call with IRQ disabled */
353void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
354			     struct net_device *dev)
355{
356	int status = NETDEV_TX_BUSY;
357	unsigned long tries;
358	const struct net_device_ops *ops = dev->netdev_ops;
359	/* It is up to the caller to keep npinfo alive. */
360	struct netpoll_info *npinfo;
361
362	WARN_ON_ONCE(!irqs_disabled());
363
364	npinfo = rcu_dereference_bh(np->dev->npinfo);
365	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
366		__kfree_skb(skb);
367		return;
368	}
369
370	/* don't get messages out of order, and no recursion */
371	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
372		struct netdev_queue *txq;
373
374		txq = netdev_pick_tx(dev, skb);
375
376		/* try until next clock tick */
377		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
378		     tries > 0; --tries) {
379			if (__netif_tx_trylock(txq)) {
380				if (!netif_xmit_stopped(txq)) {
381					if (vlan_tx_tag_present(skb) &&
382					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
383						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
384						if (unlikely(!skb))
385							break;
386						skb->vlan_tci = 0;
387					}
388
389					status = ops->ndo_start_xmit(skb, dev);
390					if (status == NETDEV_TX_OK)
391						txq_trans_update(txq);
392				}
393				__netif_tx_unlock(txq);
394
395				if (status == NETDEV_TX_OK)
396					break;
397
398			}
399
400			/* tickle device maybe there is some cleanup */
401			netpoll_poll_dev(np->dev);
402
403			udelay(USEC_PER_POLL);
404		}
405
406		WARN_ONCE(!irqs_disabled(),
407			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
408			dev->name, ops->ndo_start_xmit);
409
410	}
411
412	if (status != NETDEV_TX_OK) {
413		skb_queue_tail(&npinfo->txq, skb);
414		schedule_delayed_work(&npinfo->tx_work,0);
415	}
416}
417EXPORT_SYMBOL(netpoll_send_skb_on_dev);
418
419void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
420{
421	int total_len, ip_len, udp_len;
422	struct sk_buff *skb;
423	struct udphdr *udph;
424	struct iphdr *iph;
425	struct ethhdr *eth;
426	static atomic_t ip_ident;
427	struct ipv6hdr *ip6h;
428
429	udp_len = len + sizeof(*udph);
430	if (np->ipv6)
431		ip_len = udp_len + sizeof(*ip6h);
432	else
433		ip_len = udp_len + sizeof(*iph);
434
435	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
436
437	skb = find_skb(np, total_len + np->dev->needed_tailroom,
438		       total_len - len);
439	if (!skb)
440		return;
441
442	skb_copy_to_linear_data(skb, msg, len);
443	skb_put(skb, len);
444
445	skb_push(skb, sizeof(*udph));
446	skb_reset_transport_header(skb);
447	udph = udp_hdr(skb);
448	udph->source = htons(np->local_port);
449	udph->dest = htons(np->remote_port);
450	udph->len = htons(udp_len);
451
452	if (np->ipv6) {
453		udph->check = 0;
454		udph->check = csum_ipv6_magic(&np->local_ip.in6,
455					      &np->remote_ip.in6,
456					      udp_len, IPPROTO_UDP,
457					      csum_partial(udph, udp_len, 0));
458		if (udph->check == 0)
459			udph->check = CSUM_MANGLED_0;
460
461		skb_push(skb, sizeof(*ip6h));
462		skb_reset_network_header(skb);
463		ip6h = ipv6_hdr(skb);
464
465		/* ip6h->version = 6; ip6h->priority = 0; */
466		put_unaligned(0x60, (unsigned char *)ip6h);
467		ip6h->flow_lbl[0] = 0;
468		ip6h->flow_lbl[1] = 0;
469		ip6h->flow_lbl[2] = 0;
470
471		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
472		ip6h->nexthdr = IPPROTO_UDP;
473		ip6h->hop_limit = 32;
474		ip6h->saddr = np->local_ip.in6;
475		ip6h->daddr = np->remote_ip.in6;
476
477		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
478		skb_reset_mac_header(skb);
479		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
480	} else {
481		udph->check = 0;
482		udph->check = csum_tcpudp_magic(np->local_ip.ip,
483						np->remote_ip.ip,
484						udp_len, IPPROTO_UDP,
485						csum_partial(udph, udp_len, 0));
486		if (udph->check == 0)
487			udph->check = CSUM_MANGLED_0;
488
489		skb_push(skb, sizeof(*iph));
490		skb_reset_network_header(skb);
491		iph = ip_hdr(skb);
492
493		/* iph->version = 4; iph->ihl = 5; */
494		put_unaligned(0x45, (unsigned char *)iph);
495		iph->tos      = 0;
496		put_unaligned(htons(ip_len), &(iph->tot_len));
497		iph->id       = htons(atomic_inc_return(&ip_ident));
498		iph->frag_off = 0;
499		iph->ttl      = 64;
500		iph->protocol = IPPROTO_UDP;
501		iph->check    = 0;
502		put_unaligned(np->local_ip.ip, &(iph->saddr));
503		put_unaligned(np->remote_ip.ip, &(iph->daddr));
504		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
505
506		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
507		skb_reset_mac_header(skb);
508		skb->protocol = eth->h_proto = htons(ETH_P_IP);
509	}
510
511	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
512	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
513
514	skb->dev = np->dev;
515
516	netpoll_send_skb(np, skb);
517}
518EXPORT_SYMBOL(netpoll_send_udp);
519
520static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
521{
522	int size, type = ARPOP_REPLY;
523	__be32 sip, tip;
524	unsigned char *sha;
525	struct sk_buff *send_skb;
526	struct netpoll *np, *tmp;
527	unsigned long flags;
528	int hlen, tlen;
529	int hits = 0, proto;
530
531	if (list_empty(&npinfo->rx_np))
532		return;
533
534	/* Before checking the packet, we do some early
535	   inspection whether this is interesting at all */
536	spin_lock_irqsave(&npinfo->rx_lock, flags);
537	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
538		if (np->dev == skb->dev)
539			hits++;
540	}
541	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
542
543	/* No netpoll struct is using this dev */
544	if (!hits)
545		return;
546
547	proto = ntohs(eth_hdr(skb)->h_proto);
548	if (proto == ETH_P_IP) {
549		struct arphdr *arp;
550		unsigned char *arp_ptr;
551		/* No arp on this interface */
552		if (skb->dev->flags & IFF_NOARP)
553			return;
554
555		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
556			return;
557
558		skb_reset_network_header(skb);
559		skb_reset_transport_header(skb);
560		arp = arp_hdr(skb);
561
562		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
563		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
564		    arp->ar_pro != htons(ETH_P_IP) ||
565		    arp->ar_op != htons(ARPOP_REQUEST))
566			return;
567
568		arp_ptr = (unsigned char *)(arp+1);
569		/* save the location of the src hw addr */
570		sha = arp_ptr;
571		arp_ptr += skb->dev->addr_len;
572		memcpy(&sip, arp_ptr, 4);
573		arp_ptr += 4;
574		/* If we actually cared about dst hw addr,
575		   it would get copied here */
576		arp_ptr += skb->dev->addr_len;
577		memcpy(&tip, arp_ptr, 4);
578
579		/* Should we ignore arp? */
580		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
581			return;
582
583		size = arp_hdr_len(skb->dev);
584
585		spin_lock_irqsave(&npinfo->rx_lock, flags);
586		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
587			if (tip != np->local_ip.ip)
588				continue;
589
590			hlen = LL_RESERVED_SPACE(np->dev);
591			tlen = np->dev->needed_tailroom;
592			send_skb = find_skb(np, size + hlen + tlen, hlen);
593			if (!send_skb)
594				continue;
595
596			skb_reset_network_header(send_skb);
597			arp = (struct arphdr *) skb_put(send_skb, size);
598			send_skb->dev = skb->dev;
599			send_skb->protocol = htons(ETH_P_ARP);
600
601			/* Fill the device header for the ARP frame */
602			if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
603					    sha, np->dev->dev_addr,
604					    send_skb->len) < 0) {
605				kfree_skb(send_skb);
606				continue;
607			}
608
609			/*
610			 * Fill out the arp protocol part.
611			 *
612			 * we only support ethernet device type,
613			 * which (according to RFC 1390) should
614			 * always equal 1 (Ethernet).
615			 */
616
617			arp->ar_hrd = htons(np->dev->type);
618			arp->ar_pro = htons(ETH_P_IP);
619			arp->ar_hln = np->dev->addr_len;
620			arp->ar_pln = 4;
621			arp->ar_op = htons(type);
622
623			arp_ptr = (unsigned char *)(arp + 1);
624			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
625			arp_ptr += np->dev->addr_len;
626			memcpy(arp_ptr, &tip, 4);
627			arp_ptr += 4;
628			memcpy(arp_ptr, sha, np->dev->addr_len);
629			arp_ptr += np->dev->addr_len;
630			memcpy(arp_ptr, &sip, 4);
631
632			netpoll_send_skb(np, send_skb);
633
634			/* If there are several rx_hooks for the same address,
635			   we're fine by sending a single reply */
636			break;
637		}
638		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
639	} else if( proto == ETH_P_IPV6) {
640#if IS_ENABLED(CONFIG_IPV6)
641		struct nd_msg *msg;
642		u8 *lladdr = NULL;
643		struct ipv6hdr *hdr;
644		struct icmp6hdr *icmp6h;
645		const struct in6_addr *saddr;
646		const struct in6_addr *daddr;
647		struct inet6_dev *in6_dev = NULL;
648		struct in6_addr *target;
649
650		in6_dev = in6_dev_get(skb->dev);
651		if (!in6_dev || !in6_dev->cnf.accept_ra)
652			return;
653
654		if (!pskb_may_pull(skb, skb->len))
655			return;
656
657		msg = (struct nd_msg *)skb_transport_header(skb);
658
659		__skb_push(skb, skb->data - skb_transport_header(skb));
660
661		if (ipv6_hdr(skb)->hop_limit != 255)
662			return;
663		if (msg->icmph.icmp6_code != 0)
664			return;
665		if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
666			return;
667
668		saddr = &ipv6_hdr(skb)->saddr;
669		daddr = &ipv6_hdr(skb)->daddr;
670
671		size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
672
673		spin_lock_irqsave(&npinfo->rx_lock, flags);
674		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
675			if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
676				continue;
677
678			hlen = LL_RESERVED_SPACE(np->dev);
679			tlen = np->dev->needed_tailroom;
680			send_skb = find_skb(np, size + hlen + tlen, hlen);
681			if (!send_skb)
682				continue;
683
684			send_skb->protocol = htons(ETH_P_IPV6);
685			send_skb->dev = skb->dev;
686
687			skb_reset_network_header(send_skb);
688			skb_put(send_skb, sizeof(struct ipv6hdr));
689			hdr = ipv6_hdr(send_skb);
690
691			*(__be32*)hdr = htonl(0x60000000);
692
693			hdr->payload_len = htons(size);
694			hdr->nexthdr = IPPROTO_ICMPV6;
695			hdr->hop_limit = 255;
696			hdr->saddr = *saddr;
697			hdr->daddr = *daddr;
698
699			send_skb->transport_header = send_skb->tail;
700			skb_put(send_skb, size);
701
702			icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
703			icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
704			icmp6h->icmp6_router = 0;
705			icmp6h->icmp6_solicited = 1;
706			target = (struct in6_addr *)skb_transport_header(send_skb) + sizeof(struct icmp6hdr);
707			*target = msg->target;
708			icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
709							      IPPROTO_ICMPV6,
710							      csum_partial(icmp6h,
711									   size, 0));
712
713			if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
714					    lladdr, np->dev->dev_addr,
715					    send_skb->len) < 0) {
716				kfree_skb(send_skb);
717				continue;
718			}
719
720			netpoll_send_skb(np, send_skb);
721
722			/* If there are several rx_hooks for the same address,
723			   we're fine by sending a single reply */
724			break;
725		}
726		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
727#endif
728	}
729}
730
731static bool pkt_is_ns(struct sk_buff *skb)
732{
733	struct nd_msg *msg;
734	struct ipv6hdr *hdr;
735
736	if (skb->protocol != htons(ETH_P_ARP))
737		return false;
738	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
739		return false;
740
741	msg = (struct nd_msg *)skb_transport_header(skb);
742	__skb_push(skb, skb->data - skb_transport_header(skb));
743	hdr = ipv6_hdr(skb);
744
745	if (hdr->nexthdr != IPPROTO_ICMPV6)
746		return false;
747	if (hdr->hop_limit != 255)
748		return false;
749	if (msg->icmph.icmp6_code != 0)
750		return false;
751	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
752		return false;
753
754	return true;
755}
756
757int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
758{
759	int proto, len, ulen;
760	int hits = 0;
761	const struct iphdr *iph;
762	struct udphdr *uh;
763	struct netpoll *np, *tmp;
764
765	if (list_empty(&npinfo->rx_np))
766		goto out;
767
768	if (skb->dev->type != ARPHRD_ETHER)
769		goto out;
770
771	/* check if netpoll clients need ARP */
772	if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
773		skb_queue_tail(&npinfo->neigh_tx, skb);
774		return 1;
775	} else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
776		skb_queue_tail(&npinfo->neigh_tx, skb);
777		return 1;
778	}
779
780	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
781		skb = vlan_untag(skb);
782		if (unlikely(!skb))
783			goto out;
784	}
785
786	proto = ntohs(eth_hdr(skb)->h_proto);
787	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
788		goto out;
789	if (skb->pkt_type == PACKET_OTHERHOST)
790		goto out;
791	if (skb_shared(skb))
792		goto out;
793
794	if (proto == ETH_P_IP) {
795		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
796			goto out;
797		iph = (struct iphdr *)skb->data;
798		if (iph->ihl < 5 || iph->version != 4)
799			goto out;
800		if (!pskb_may_pull(skb, iph->ihl*4))
801			goto out;
802		iph = (struct iphdr *)skb->data;
803		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
804			goto out;
805
806		len = ntohs(iph->tot_len);
807		if (skb->len < len || len < iph->ihl*4)
808			goto out;
809
810		/*
811		 * Our transport medium may have padded the buffer out.
812		 * Now We trim to the true length of the frame.
813		 */
814		if (pskb_trim_rcsum(skb, len))
815			goto out;
816
817		iph = (struct iphdr *)skb->data;
818		if (iph->protocol != IPPROTO_UDP)
819			goto out;
820
821		len -= iph->ihl*4;
822		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
823		ulen = ntohs(uh->len);
824
825		if (ulen != len)
826			goto out;
827		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
828			goto out;
829		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
830			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
831				continue;
832			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
833				continue;
834			if (np->local_port && np->local_port != ntohs(uh->dest))
835				continue;
836
837			np->rx_hook(np, ntohs(uh->source),
838				       (char *)(uh+1),
839				       ulen - sizeof(struct udphdr));
840			hits++;
841		}
842	} else {
843#if IS_ENABLED(CONFIG_IPV6)
844		const struct ipv6hdr *ip6h;
845
846		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
847			goto out;
848		ip6h = (struct ipv6hdr *)skb->data;
849		if (ip6h->version != 6)
850			goto out;
851		len = ntohs(ip6h->payload_len);
852		if (!len)
853			goto out;
854		if (len + sizeof(struct ipv6hdr) > skb->len)
855			goto out;
856		if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
857			goto out;
858		ip6h = ipv6_hdr(skb);
859		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
860			goto out;
861		uh = udp_hdr(skb);
862		ulen = ntohs(uh->len);
863		if (ulen != skb->len)
864			goto out;
865		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
866			goto out;
867		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
868			if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
869				continue;
870			if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
871				continue;
872			if (np->local_port && np->local_port != ntohs(uh->dest))
873				continue;
874
875			np->rx_hook(np, ntohs(uh->source),
876				       (char *)(uh+1),
877				       ulen - sizeof(struct udphdr));
878			hits++;
879		}
880#endif
881	}
882
883	if (!hits)
884		goto out;
885
886	kfree_skb(skb);
887	return 1;
888
889out:
890	if (atomic_read(&trapped)) {
891		kfree_skb(skb);
892		return 1;
893	}
894
895	return 0;
896}
897
898void netpoll_print_options(struct netpoll *np)
899{
900	np_info(np, "local port %d\n", np->local_port);
901	if (np->ipv6)
902		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
903	else
904		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
905	np_info(np, "interface '%s'\n", np->dev_name);
906	np_info(np, "remote port %d\n", np->remote_port);
907	if (np->ipv6)
908		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
909	else
910		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
911	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
912}
913EXPORT_SYMBOL(netpoll_print_options);
914
915static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
916{
917	const char *end;
918
919	if (!strchr(str, ':') &&
920	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
921		if (!*end)
922			return 0;
923	}
924	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
925#if IS_ENABLED(CONFIG_IPV6)
926		if (!*end)
927			return 1;
928#else
929		return -1;
930#endif
931	}
932	return -1;
933}
934
935int netpoll_parse_options(struct netpoll *np, char *opt)
936{
937	char *cur=opt, *delim;
938	int ipv6;
939
940	if (*cur != '@') {
941		if ((delim = strchr(cur, '@')) == NULL)
942			goto parse_failed;
943		*delim = 0;
944		if (kstrtou16(cur, 10, &np->local_port))
945			goto parse_failed;
946		cur = delim;
947	}
948	cur++;
949
950	if (*cur != '/') {
951		if ((delim = strchr(cur, '/')) == NULL)
952			goto parse_failed;
953		*delim = 0;
954		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
955		if (ipv6 < 0)
956			goto parse_failed;
957		else
958			np->ipv6 = (bool)ipv6;
959		cur = delim;
960	}
961	cur++;
962
963	if (*cur != ',') {
964		/* parse out dev name */
965		if ((delim = strchr(cur, ',')) == NULL)
966			goto parse_failed;
967		*delim = 0;
968		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
969		cur = delim;
970	}
971	cur++;
972
973	if (*cur != '@') {
974		/* dst port */
975		if ((delim = strchr(cur, '@')) == NULL)
976			goto parse_failed;
977		*delim = 0;
978		if (*cur == ' ' || *cur == '\t')
979			np_info(np, "warning: whitespace is not allowed\n");
980		if (kstrtou16(cur, 10, &np->remote_port))
981			goto parse_failed;
982		cur = delim;
983	}
984	cur++;
985
986	/* dst ip */
987	if ((delim = strchr(cur, '/')) == NULL)
988		goto parse_failed;
989	*delim = 0;
990	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
991	if (ipv6 < 0)
992		goto parse_failed;
993	else if (np->ipv6 != (bool)ipv6)
994		goto parse_failed;
995	else
996		np->ipv6 = (bool)ipv6;
997	cur = delim + 1;
998
999	if (*cur != 0) {
1000		/* MAC address */
1001		if (!mac_pton(cur, np->remote_mac))
1002			goto parse_failed;
1003	}
1004
1005	netpoll_print_options(np);
1006
1007	return 0;
1008
1009 parse_failed:
1010	np_info(np, "couldn't parse config at '%s'!\n", cur);
1011	return -1;
1012}
1013EXPORT_SYMBOL(netpoll_parse_options);
1014
1015int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1016{
1017	struct netpoll_info *npinfo;
1018	const struct net_device_ops *ops;
1019	unsigned long flags;
1020	int err;
1021
1022	np->dev = ndev;
1023	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1024	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
1025
1026	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
1027	    !ndev->netdev_ops->ndo_poll_controller) {
1028		np_err(np, "%s doesn't support polling, aborting\n",
1029		       np->dev_name);
1030		err = -ENOTSUPP;
1031		goto out;
1032	}
1033
1034	if (!ndev->npinfo) {
1035		npinfo = kmalloc(sizeof(*npinfo), gfp);
1036		if (!npinfo) {
1037			err = -ENOMEM;
1038			goto out;
1039		}
1040
1041		npinfo->rx_flags = 0;
1042		INIT_LIST_HEAD(&npinfo->rx_np);
1043
1044		spin_lock_init(&npinfo->rx_lock);
1045		mutex_init(&npinfo->dev_lock);
1046		skb_queue_head_init(&npinfo->neigh_tx);
1047		skb_queue_head_init(&npinfo->txq);
1048		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
1049
1050		atomic_set(&npinfo->refcnt, 1);
1051
1052		ops = np->dev->netdev_ops;
1053		if (ops->ndo_netpoll_setup) {
1054			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1055			if (err)
1056				goto free_npinfo;
1057		}
1058	} else {
1059		npinfo = ndev->npinfo;
1060		atomic_inc(&npinfo->refcnt);
1061	}
1062
1063	npinfo->netpoll = np;
1064
1065	if (np->rx_hook) {
1066		spin_lock_irqsave(&npinfo->rx_lock, flags);
1067		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1068		list_add_tail(&np->rx, &npinfo->rx_np);
1069		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1070	}
1071
1072	/* last thing to do is link it to the net device structure */
1073	rcu_assign_pointer(ndev->npinfo, npinfo);
1074
1075	return 0;
1076
1077free_npinfo:
1078	kfree(npinfo);
1079out:
1080	return err;
1081}
1082EXPORT_SYMBOL_GPL(__netpoll_setup);
1083
1084int netpoll_setup(struct netpoll *np)
1085{
1086	struct net_device *ndev = NULL;
1087	struct in_device *in_dev;
1088	int err;
1089
1090	rtnl_lock();
1091	if (np->dev_name) {
1092		struct net *net = current->nsproxy->net_ns;
1093		ndev = __dev_get_by_name(net, np->dev_name);
1094	}
1095	if (!ndev) {
1096		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1097		err = -ENODEV;
1098		goto unlock;
1099	}
1100	dev_hold(ndev);
1101
1102	if (netdev_master_upper_dev_get(ndev)) {
1103		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1104		err = -EBUSY;
1105		goto put;
1106	}
1107
1108	if (!netif_running(ndev)) {
1109		unsigned long atmost, atleast;
1110
1111		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
1112
1113		err = dev_open(ndev);
1114
1115		if (err) {
1116			np_err(np, "failed to open %s\n", ndev->name);
1117			goto put;
1118		}
1119
1120		rtnl_unlock();
1121		atleast = jiffies + HZ/10;
1122		atmost = jiffies + carrier_timeout * HZ;
1123		while (!netif_carrier_ok(ndev)) {
1124			if (time_after(jiffies, atmost)) {
1125				np_notice(np, "timeout waiting for carrier\n");
1126				break;
1127			}
1128			msleep(1);
1129		}
1130
1131		/* If carrier appears to come up instantly, we don't
1132		 * trust it and pause so that we don't pump all our
1133		 * queued console messages into the bitbucket.
1134		 */
1135
1136		if (time_before(jiffies, atleast)) {
1137			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
1138			msleep(4000);
1139		}
1140		rtnl_lock();
1141	}
1142
1143	if (!np->local_ip.ip) {
1144		if (!np->ipv6) {
1145			in_dev = __in_dev_get_rtnl(ndev);
1146
1147			if (!in_dev || !in_dev->ifa_list) {
1148				np_err(np, "no IP address for %s, aborting\n",
1149				       np->dev_name);
1150				err = -EDESTADDRREQ;
1151				goto put;
1152			}
1153
1154			np->local_ip.ip = in_dev->ifa_list->ifa_local;
1155			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1156		} else {
1157#if IS_ENABLED(CONFIG_IPV6)
1158			struct inet6_dev *idev;
1159
1160			err = -EDESTADDRREQ;
1161			idev = __in6_dev_get(ndev);
1162			if (idev) {
1163				struct inet6_ifaddr *ifp;
1164
1165				read_lock_bh(&idev->lock);
1166				list_for_each_entry(ifp, &idev->addr_list, if_list) {
1167					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1168						continue;
1169					np->local_ip.in6 = ifp->addr;
1170					err = 0;
1171					break;
1172				}
1173				read_unlock_bh(&idev->lock);
1174			}
1175			if (err) {
1176				np_err(np, "no IPv6 address for %s, aborting\n",
1177				       np->dev_name);
1178				goto put;
1179			} else
1180				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1181#else
1182			np_err(np, "IPv6 is not supported %s, aborting\n",
1183			       np->dev_name);
1184			err = -EINVAL;
1185			goto put;
1186#endif
1187		}
1188	}
1189
1190	/* fill up the skb queue */
1191	refill_skbs();
1192
1193	err = __netpoll_setup(np, ndev, GFP_KERNEL);
1194	if (err)
1195		goto put;
1196
1197	rtnl_unlock();
1198	return 0;
1199
1200put:
1201	dev_put(ndev);
1202unlock:
1203	rtnl_unlock();
1204	return err;
1205}
1206EXPORT_SYMBOL(netpoll_setup);
1207
1208static int __init netpoll_init(void)
1209{
1210	skb_queue_head_init(&skb_pool);
1211	init_srcu_struct(&netpoll_srcu);
1212	return 0;
1213}
1214core_initcall(netpoll_init);
1215
1216static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
1217{
1218	struct netpoll_info *npinfo =
1219			container_of(rcu_head, struct netpoll_info, rcu);
1220
1221	skb_queue_purge(&npinfo->neigh_tx);
1222	skb_queue_purge(&npinfo->txq);
1223
1224	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
1225	cancel_delayed_work(&npinfo->tx_work);
1226
1227	/* clean after last, unfinished work */
1228	__skb_queue_purge(&npinfo->txq);
1229	/* now cancel it again */
1230	cancel_delayed_work(&npinfo->tx_work);
1231	kfree(npinfo);
1232}
1233
1234void __netpoll_cleanup(struct netpoll *np)
1235{
1236	struct netpoll_info *npinfo;
1237	unsigned long flags;
1238
1239	npinfo = np->dev->npinfo;
1240	if (!npinfo)
1241		return;
1242
1243	if (!list_empty(&npinfo->rx_np)) {
1244		spin_lock_irqsave(&npinfo->rx_lock, flags);
1245		list_del(&np->rx);
1246		if (list_empty(&npinfo->rx_np))
1247			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
1248		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1249	}
1250
1251	synchronize_srcu(&netpoll_srcu);
1252
1253	if (atomic_dec_and_test(&npinfo->refcnt)) {
1254		const struct net_device_ops *ops;
1255
1256		ops = np->dev->netdev_ops;
1257		if (ops->ndo_netpoll_cleanup)
1258			ops->ndo_netpoll_cleanup(np->dev);
1259
1260		rcu_assign_pointer(np->dev->npinfo, NULL);
1261		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
1262	}
1263}
1264EXPORT_SYMBOL_GPL(__netpoll_cleanup);
1265
1266static void netpoll_async_cleanup(struct work_struct *work)
1267{
1268	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
1269
1270	rtnl_lock();
1271	__netpoll_cleanup(np);
1272	rtnl_unlock();
1273	kfree(np);
1274}
1275
1276void __netpoll_free_async(struct netpoll *np)
1277{
1278	schedule_work(&np->cleanup_work);
1279}
1280EXPORT_SYMBOL_GPL(__netpoll_free_async);
1281
1282void netpoll_cleanup(struct netpoll *np)
1283{
1284	if (!np->dev)
1285		return;
1286
1287	rtnl_lock();
1288	__netpoll_cleanup(np);
1289	rtnl_unlock();
1290
1291	dev_put(np->dev);
1292	np->dev = NULL;
1293}
1294EXPORT_SYMBOL(netpoll_cleanup);
1295
1296int netpoll_trap(void)
1297{
1298	return atomic_read(&trapped);
1299}
1300EXPORT_SYMBOL(netpoll_trap);
1301
1302void netpoll_set_trap(int trap)
1303{
1304	if (trap)
1305		atomic_inc(&trapped);
1306	else
1307		atomic_dec(&trapped);
1308}
1309EXPORT_SYMBOL(netpoll_set_trap);
1310