netpoll.c revision faeed828f9607923e9dc22182e819908e95c8852
1/*
2 * Common framework for low-level network console, dump, and debugger code
3 *
4 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5 *
6 * based on the netconsole code from:
7 *
8 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2002  Red Hat, Inc.
10 */
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/moduleparam.h>
15#include <linux/netdevice.h>
16#include <linux/etherdevice.h>
17#include <linux/string.h>
18#include <linux/if_arp.h>
19#include <linux/inetdevice.h>
20#include <linux/inet.h>
21#include <linux/interrupt.h>
22#include <linux/netpoll.h>
23#include <linux/sched.h>
24#include <linux/delay.h>
25#include <linux/rcupdate.h>
26#include <linux/workqueue.h>
27#include <linux/slab.h>
28#include <linux/export.h>
29#include <linux/if_vlan.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/addrconf.h>
33#include <net/ndisc.h>
34#include <net/ip6_checksum.h>
35#include <asm/unaligned.h>
36#include <trace/events/napi.h>
37
38/*
39 * We maintain a small pool of fully-sized skbs, to make sure the
40 * message gets out even in extreme OOM situations.
41 */
42
43#define MAX_UDP_CHUNK 1460
44#define MAX_SKBS 32
45
46static struct sk_buff_head skb_pool;
47
48static atomic_t trapped;
49
50#define USEC_PER_POLL	50
51#define NETPOLL_RX_ENABLED  1
52#define NETPOLL_RX_DROP     2
53
54#define MAX_SKB_SIZE							\
55	(sizeof(struct ethhdr) +					\
56	 sizeof(struct iphdr) +						\
57	 sizeof(struct udphdr) +					\
58	 MAX_UDP_CHUNK)
59
60static void zap_completion_queue(void);
61static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
62
63static unsigned int carrier_timeout = 4;
64module_param(carrier_timeout, uint, 0644);
65
66#define np_info(np, fmt, ...)				\
67	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
68#define np_err(np, fmt, ...)				\
69	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
70#define np_notice(np, fmt, ...)				\
71	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
72
73static void queue_process(struct work_struct *work)
74{
75	struct netpoll_info *npinfo =
76		container_of(work, struct netpoll_info, tx_work.work);
77	struct sk_buff *skb;
78	unsigned long flags;
79
80	while ((skb = skb_dequeue(&npinfo->txq))) {
81		struct net_device *dev = skb->dev;
82		const struct net_device_ops *ops = dev->netdev_ops;
83		struct netdev_queue *txq;
84
85		if (!netif_device_present(dev) || !netif_running(dev)) {
86			__kfree_skb(skb);
87			continue;
88		}
89
90		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
91
92		local_irq_save(flags);
93		__netif_tx_lock(txq, smp_processor_id());
94		if (netif_xmit_frozen_or_stopped(txq) ||
95		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
96			skb_queue_head(&npinfo->txq, skb);
97			__netif_tx_unlock(txq);
98			local_irq_restore(flags);
99
100			schedule_delayed_work(&npinfo->tx_work, HZ/10);
101			return;
102		}
103		__netif_tx_unlock(txq);
104		local_irq_restore(flags);
105	}
106}
107
108static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
109			    unsigned short ulen, __be32 saddr, __be32 daddr)
110{
111	__wsum psum;
112
113	if (uh->check == 0 || skb_csum_unnecessary(skb))
114		return 0;
115
116	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
117
118	if (skb->ip_summed == CHECKSUM_COMPLETE &&
119	    !csum_fold(csum_add(psum, skb->csum)))
120		return 0;
121
122	skb->csum = psum;
123
124	return __skb_checksum_complete(skb);
125}
126
127/*
128 * Check whether delayed processing was scheduled for our NIC. If so,
129 * we attempt to grab the poll lock and use ->poll() to pump the card.
130 * If this fails, either we've recursed in ->poll() or it's already
131 * running on another CPU.
132 *
133 * Note: we don't mask interrupts with this lock because we're using
134 * trylock here and interrupts are already disabled in the softirq
135 * case. Further, we test the poll_owner to avoid recursion on UP
136 * systems where the lock doesn't exist.
137 *
138 * In cases where there is bi-directional communications, reading only
139 * one message at a time can lead to packets being dropped by the
140 * network adapter, forcing superfluous retries and possibly timeouts.
141 * Thus, we set our budget to greater than 1.
142 */
143static int poll_one_napi(struct netpoll_info *npinfo,
144			 struct napi_struct *napi, int budget)
145{
146	int work;
147
148	/* net_rx_action's ->poll() invocations and our's are
149	 * synchronized by this test which is only made while
150	 * holding the napi->poll_lock.
151	 */
152	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
153		return budget;
154
155	npinfo->rx_flags |= NETPOLL_RX_DROP;
156	atomic_inc(&trapped);
157	set_bit(NAPI_STATE_NPSVC, &napi->state);
158
159	work = napi->poll(napi, budget);
160	trace_napi_poll(napi);
161
162	clear_bit(NAPI_STATE_NPSVC, &napi->state);
163	atomic_dec(&trapped);
164	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
165
166	return budget - work;
167}
168
169static void poll_napi(struct net_device *dev)
170{
171	struct napi_struct *napi;
172	int budget = 16;
173
174	list_for_each_entry(napi, &dev->napi_list, dev_list) {
175		if (napi->poll_owner != smp_processor_id() &&
176		    spin_trylock(&napi->poll_lock)) {
177			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
178					       napi, budget);
179			spin_unlock(&napi->poll_lock);
180
181			if (!budget)
182				break;
183		}
184	}
185}
186
187static void service_neigh_queue(struct netpoll_info *npi)
188{
189	if (npi) {
190		struct sk_buff *skb;
191
192		while ((skb = skb_dequeue(&npi->neigh_tx)))
193			netpoll_neigh_reply(skb, npi);
194	}
195}
196
197static void netpoll_poll_dev(struct net_device *dev)
198{
199	const struct net_device_ops *ops;
200	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
201
202	if (!dev || !netif_running(dev))
203		return;
204
205	ops = dev->netdev_ops;
206	if (!ops->ndo_poll_controller)
207		return;
208
209	/* Process pending work on NIC */
210	ops->ndo_poll_controller(dev);
211
212	poll_napi(dev);
213
214	if (dev->flags & IFF_SLAVE) {
215		if (ni) {
216			struct net_device *bond_dev;
217			struct sk_buff *skb;
218			struct netpoll_info *bond_ni;
219
220			bond_dev = netdev_master_upper_dev_get_rcu(dev);
221			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
222			while ((skb = skb_dequeue(&ni->neigh_tx))) {
223				skb->dev = bond_dev;
224				skb_queue_tail(&bond_ni->neigh_tx, skb);
225			}
226		}
227	}
228
229	service_neigh_queue(ni);
230
231	zap_completion_queue();
232}
233
234static void refill_skbs(void)
235{
236	struct sk_buff *skb;
237	unsigned long flags;
238
239	spin_lock_irqsave(&skb_pool.lock, flags);
240	while (skb_pool.qlen < MAX_SKBS) {
241		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
242		if (!skb)
243			break;
244
245		__skb_queue_tail(&skb_pool, skb);
246	}
247	spin_unlock_irqrestore(&skb_pool.lock, flags);
248}
249
250static void zap_completion_queue(void)
251{
252	unsigned long flags;
253	struct softnet_data *sd = &get_cpu_var(softnet_data);
254
255	if (sd->completion_queue) {
256		struct sk_buff *clist;
257
258		local_irq_save(flags);
259		clist = sd->completion_queue;
260		sd->completion_queue = NULL;
261		local_irq_restore(flags);
262
263		while (clist != NULL) {
264			struct sk_buff *skb = clist;
265			clist = clist->next;
266			if (skb->destructor) {
267				atomic_inc(&skb->users);
268				dev_kfree_skb_any(skb); /* put this one back */
269			} else {
270				__kfree_skb(skb);
271			}
272		}
273	}
274
275	put_cpu_var(softnet_data);
276}
277
278static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
279{
280	int count = 0;
281	struct sk_buff *skb;
282
283	zap_completion_queue();
284	refill_skbs();
285repeat:
286
287	skb = alloc_skb(len, GFP_ATOMIC);
288	if (!skb)
289		skb = skb_dequeue(&skb_pool);
290
291	if (!skb) {
292		if (++count < 10) {
293			netpoll_poll_dev(np->dev);
294			goto repeat;
295		}
296		return NULL;
297	}
298
299	atomic_set(&skb->users, 1);
300	skb_reserve(skb, reserve);
301	return skb;
302}
303
304static int netpoll_owner_active(struct net_device *dev)
305{
306	struct napi_struct *napi;
307
308	list_for_each_entry(napi, &dev->napi_list, dev_list) {
309		if (napi->poll_owner == smp_processor_id())
310			return 1;
311	}
312	return 0;
313}
314
315/* call with IRQ disabled */
316void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
317			     struct net_device *dev)
318{
319	int status = NETDEV_TX_BUSY;
320	unsigned long tries;
321	const struct net_device_ops *ops = dev->netdev_ops;
322	/* It is up to the caller to keep npinfo alive. */
323	struct netpoll_info *npinfo;
324
325	WARN_ON_ONCE(!irqs_disabled());
326
327	npinfo = rcu_dereference_bh(np->dev->npinfo);
328	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
329		__kfree_skb(skb);
330		return;
331	}
332
333	/* don't get messages out of order, and no recursion */
334	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
335		struct netdev_queue *txq;
336
337		txq = netdev_pick_tx(dev, skb);
338
339		/* try until next clock tick */
340		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
341		     tries > 0; --tries) {
342			if (__netif_tx_trylock(txq)) {
343				if (!netif_xmit_stopped(txq)) {
344					if (vlan_tx_tag_present(skb) &&
345					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
346						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
347						if (unlikely(!skb))
348							break;
349						skb->vlan_tci = 0;
350					}
351
352					status = ops->ndo_start_xmit(skb, dev);
353					if (status == NETDEV_TX_OK)
354						txq_trans_update(txq);
355				}
356				__netif_tx_unlock(txq);
357
358				if (status == NETDEV_TX_OK)
359					break;
360
361			}
362
363			/* tickle device maybe there is some cleanup */
364			netpoll_poll_dev(np->dev);
365
366			udelay(USEC_PER_POLL);
367		}
368
369		WARN_ONCE(!irqs_disabled(),
370			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
371			dev->name, ops->ndo_start_xmit);
372
373	}
374
375	if (status != NETDEV_TX_OK) {
376		skb_queue_tail(&npinfo->txq, skb);
377		schedule_delayed_work(&npinfo->tx_work,0);
378	}
379}
380EXPORT_SYMBOL(netpoll_send_skb_on_dev);
381
382void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
383{
384	int total_len, ip_len, udp_len;
385	struct sk_buff *skb;
386	struct udphdr *udph;
387	struct iphdr *iph;
388	struct ethhdr *eth;
389	static atomic_t ip_ident;
390	struct ipv6hdr *ip6h;
391
392	udp_len = len + sizeof(*udph);
393	if (np->ipv6)
394		ip_len = udp_len + sizeof(*ip6h);
395	else
396		ip_len = udp_len + sizeof(*iph);
397
398	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
399
400	skb = find_skb(np, total_len + np->dev->needed_tailroom,
401		       total_len - len);
402	if (!skb)
403		return;
404
405	skb_copy_to_linear_data(skb, msg, len);
406	skb_put(skb, len);
407
408	skb_push(skb, sizeof(*udph));
409	skb_reset_transport_header(skb);
410	udph = udp_hdr(skb);
411	udph->source = htons(np->local_port);
412	udph->dest = htons(np->remote_port);
413	udph->len = htons(udp_len);
414
415	if (np->ipv6) {
416		udph->check = 0;
417		udph->check = csum_ipv6_magic(&np->local_ip.in6,
418					      &np->remote_ip.in6,
419					      udp_len, IPPROTO_UDP,
420					      csum_partial(udph, udp_len, 0));
421		if (udph->check == 0)
422			udph->check = CSUM_MANGLED_0;
423
424		skb_push(skb, sizeof(*ip6h));
425		skb_reset_network_header(skb);
426		ip6h = ipv6_hdr(skb);
427
428		/* ip6h->version = 6; ip6h->priority = 0; */
429		put_unaligned(0x60, (unsigned char *)ip6h);
430		ip6h->flow_lbl[0] = 0;
431		ip6h->flow_lbl[1] = 0;
432		ip6h->flow_lbl[2] = 0;
433
434		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
435		ip6h->nexthdr = IPPROTO_UDP;
436		ip6h->hop_limit = 32;
437		ip6h->saddr = np->local_ip.in6;
438		ip6h->daddr = np->remote_ip.in6;
439
440		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
441		skb_reset_mac_header(skb);
442		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
443	} else {
444		udph->check = 0;
445		udph->check = csum_tcpudp_magic(np->local_ip.ip,
446						np->remote_ip.ip,
447						udp_len, IPPROTO_UDP,
448						csum_partial(udph, udp_len, 0));
449		if (udph->check == 0)
450			udph->check = CSUM_MANGLED_0;
451
452		skb_push(skb, sizeof(*iph));
453		skb_reset_network_header(skb);
454		iph = ip_hdr(skb);
455
456		/* iph->version = 4; iph->ihl = 5; */
457		put_unaligned(0x45, (unsigned char *)iph);
458		iph->tos      = 0;
459		put_unaligned(htons(ip_len), &(iph->tot_len));
460		iph->id       = htons(atomic_inc_return(&ip_ident));
461		iph->frag_off = 0;
462		iph->ttl      = 64;
463		iph->protocol = IPPROTO_UDP;
464		iph->check    = 0;
465		put_unaligned(np->local_ip.ip, &(iph->saddr));
466		put_unaligned(np->remote_ip.ip, &(iph->daddr));
467		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
468
469		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
470		skb_reset_mac_header(skb);
471		skb->protocol = eth->h_proto = htons(ETH_P_IP);
472	}
473
474	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
475	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
476
477	skb->dev = np->dev;
478
479	netpoll_send_skb(np, skb);
480}
481EXPORT_SYMBOL(netpoll_send_udp);
482
483static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
484{
485	int size, type = ARPOP_REPLY;
486	__be32 sip, tip;
487	unsigned char *sha;
488	struct sk_buff *send_skb;
489	struct netpoll *np, *tmp;
490	unsigned long flags;
491	int hlen, tlen;
492	int hits = 0, proto;
493
494	if (list_empty(&npinfo->rx_np))
495		return;
496
497	/* Before checking the packet, we do some early
498	   inspection whether this is interesting at all */
499	spin_lock_irqsave(&npinfo->rx_lock, flags);
500	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
501		if (np->dev == skb->dev)
502			hits++;
503	}
504	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
505
506	/* No netpoll struct is using this dev */
507	if (!hits)
508		return;
509
510	proto = ntohs(eth_hdr(skb)->h_proto);
511	if (proto == ETH_P_IP) {
512		struct arphdr *arp;
513		unsigned char *arp_ptr;
514		/* No arp on this interface */
515		if (skb->dev->flags & IFF_NOARP)
516			return;
517
518		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
519			return;
520
521		skb_reset_network_header(skb);
522		skb_reset_transport_header(skb);
523		arp = arp_hdr(skb);
524
525		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
526		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
527		    arp->ar_pro != htons(ETH_P_IP) ||
528		    arp->ar_op != htons(ARPOP_REQUEST))
529			return;
530
531		arp_ptr = (unsigned char *)(arp+1);
532		/* save the location of the src hw addr */
533		sha = arp_ptr;
534		arp_ptr += skb->dev->addr_len;
535		memcpy(&sip, arp_ptr, 4);
536		arp_ptr += 4;
537		/* If we actually cared about dst hw addr,
538		   it would get copied here */
539		arp_ptr += skb->dev->addr_len;
540		memcpy(&tip, arp_ptr, 4);
541
542		/* Should we ignore arp? */
543		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
544			return;
545
546		size = arp_hdr_len(skb->dev);
547
548		spin_lock_irqsave(&npinfo->rx_lock, flags);
549		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
550			if (tip != np->local_ip.ip)
551				continue;
552
553			hlen = LL_RESERVED_SPACE(np->dev);
554			tlen = np->dev->needed_tailroom;
555			send_skb = find_skb(np, size + hlen + tlen, hlen);
556			if (!send_skb)
557				continue;
558
559			skb_reset_network_header(send_skb);
560			arp = (struct arphdr *) skb_put(send_skb, size);
561			send_skb->dev = skb->dev;
562			send_skb->protocol = htons(ETH_P_ARP);
563
564			/* Fill the device header for the ARP frame */
565			if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
566					    sha, np->dev->dev_addr,
567					    send_skb->len) < 0) {
568				kfree_skb(send_skb);
569				continue;
570			}
571
572			/*
573			 * Fill out the arp protocol part.
574			 *
575			 * we only support ethernet device type,
576			 * which (according to RFC 1390) should
577			 * always equal 1 (Ethernet).
578			 */
579
580			arp->ar_hrd = htons(np->dev->type);
581			arp->ar_pro = htons(ETH_P_IP);
582			arp->ar_hln = np->dev->addr_len;
583			arp->ar_pln = 4;
584			arp->ar_op = htons(type);
585
586			arp_ptr = (unsigned char *)(arp + 1);
587			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
588			arp_ptr += np->dev->addr_len;
589			memcpy(arp_ptr, &tip, 4);
590			arp_ptr += 4;
591			memcpy(arp_ptr, sha, np->dev->addr_len);
592			arp_ptr += np->dev->addr_len;
593			memcpy(arp_ptr, &sip, 4);
594
595			netpoll_send_skb(np, send_skb);
596
597			/* If there are several rx_hooks for the same address,
598			   we're fine by sending a single reply */
599			break;
600		}
601		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
602	} else if( proto == ETH_P_IPV6) {
603#if IS_ENABLED(CONFIG_IPV6)
604		struct nd_msg *msg;
605		u8 *lladdr = NULL;
606		struct ipv6hdr *hdr;
607		struct icmp6hdr *icmp6h;
608		const struct in6_addr *saddr;
609		const struct in6_addr *daddr;
610		struct inet6_dev *in6_dev = NULL;
611		struct in6_addr *target;
612
613		in6_dev = in6_dev_get(skb->dev);
614		if (!in6_dev || !in6_dev->cnf.accept_ra)
615			return;
616
617		if (!pskb_may_pull(skb, skb->len))
618			return;
619
620		msg = (struct nd_msg *)skb_transport_header(skb);
621
622		__skb_push(skb, skb->data - skb_transport_header(skb));
623
624		if (ipv6_hdr(skb)->hop_limit != 255)
625			return;
626		if (msg->icmph.icmp6_code != 0)
627			return;
628		if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
629			return;
630
631		saddr = &ipv6_hdr(skb)->saddr;
632		daddr = &ipv6_hdr(skb)->daddr;
633
634		size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
635
636		spin_lock_irqsave(&npinfo->rx_lock, flags);
637		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
638			if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
639				continue;
640
641			hlen = LL_RESERVED_SPACE(np->dev);
642			tlen = np->dev->needed_tailroom;
643			send_skb = find_skb(np, size + hlen + tlen, hlen);
644			if (!send_skb)
645				continue;
646
647			send_skb->protocol = htons(ETH_P_IPV6);
648			send_skb->dev = skb->dev;
649
650			skb_reset_network_header(send_skb);
651			skb_put(send_skb, sizeof(struct ipv6hdr));
652			hdr = ipv6_hdr(send_skb);
653
654			*(__be32*)hdr = htonl(0x60000000);
655
656			hdr->payload_len = htons(size);
657			hdr->nexthdr = IPPROTO_ICMPV6;
658			hdr->hop_limit = 255;
659			hdr->saddr = *saddr;
660			hdr->daddr = *daddr;
661
662			send_skb->transport_header = send_skb->tail;
663			skb_put(send_skb, size);
664
665			icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
666			icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
667			icmp6h->icmp6_router = 0;
668			icmp6h->icmp6_solicited = 1;
669			target = (struct in6_addr *)skb_transport_header(send_skb) + sizeof(struct icmp6hdr);
670			*target = msg->target;
671			icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
672							      IPPROTO_ICMPV6,
673							      csum_partial(icmp6h,
674									   size, 0));
675
676			if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
677					    lladdr, np->dev->dev_addr,
678					    send_skb->len) < 0) {
679				kfree_skb(send_skb);
680				continue;
681			}
682
683			netpoll_send_skb(np, send_skb);
684
685			/* If there are several rx_hooks for the same address,
686			   we're fine by sending a single reply */
687			break;
688		}
689		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
690#endif
691	}
692}
693
694static bool pkt_is_ns(struct sk_buff *skb)
695{
696	struct nd_msg *msg;
697	struct ipv6hdr *hdr;
698
699	if (skb->protocol != htons(ETH_P_ARP))
700		return false;
701	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
702		return false;
703
704	msg = (struct nd_msg *)skb_transport_header(skb);
705	__skb_push(skb, skb->data - skb_transport_header(skb));
706	hdr = ipv6_hdr(skb);
707
708	if (hdr->nexthdr != IPPROTO_ICMPV6)
709		return false;
710	if (hdr->hop_limit != 255)
711		return false;
712	if (msg->icmph.icmp6_code != 0)
713		return false;
714	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
715		return false;
716
717	return true;
718}
719
720int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
721{
722	int proto, len, ulen;
723	int hits = 0;
724	const struct iphdr *iph;
725	struct udphdr *uh;
726	struct netpoll *np, *tmp;
727
728	if (list_empty(&npinfo->rx_np))
729		goto out;
730
731	if (skb->dev->type != ARPHRD_ETHER)
732		goto out;
733
734	/* check if netpoll clients need ARP */
735	if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
736		skb_queue_tail(&npinfo->neigh_tx, skb);
737		return 1;
738	} else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
739		skb_queue_tail(&npinfo->neigh_tx, skb);
740		return 1;
741	}
742
743	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
744		skb = vlan_untag(skb);
745		if (unlikely(!skb))
746			goto out;
747	}
748
749	proto = ntohs(eth_hdr(skb)->h_proto);
750	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
751		goto out;
752	if (skb->pkt_type == PACKET_OTHERHOST)
753		goto out;
754	if (skb_shared(skb))
755		goto out;
756
757	if (proto == ETH_P_IP) {
758		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
759			goto out;
760		iph = (struct iphdr *)skb->data;
761		if (iph->ihl < 5 || iph->version != 4)
762			goto out;
763		if (!pskb_may_pull(skb, iph->ihl*4))
764			goto out;
765		iph = (struct iphdr *)skb->data;
766		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
767			goto out;
768
769		len = ntohs(iph->tot_len);
770		if (skb->len < len || len < iph->ihl*4)
771			goto out;
772
773		/*
774		 * Our transport medium may have padded the buffer out.
775		 * Now We trim to the true length of the frame.
776		 */
777		if (pskb_trim_rcsum(skb, len))
778			goto out;
779
780		iph = (struct iphdr *)skb->data;
781		if (iph->protocol != IPPROTO_UDP)
782			goto out;
783
784		len -= iph->ihl*4;
785		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
786		ulen = ntohs(uh->len);
787
788		if (ulen != len)
789			goto out;
790		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
791			goto out;
792		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
793			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
794				continue;
795			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
796				continue;
797			if (np->local_port && np->local_port != ntohs(uh->dest))
798				continue;
799
800			np->rx_hook(np, ntohs(uh->source),
801				       (char *)(uh+1),
802				       ulen - sizeof(struct udphdr));
803			hits++;
804		}
805	} else {
806#if IS_ENABLED(CONFIG_IPV6)
807		const struct ipv6hdr *ip6h;
808
809		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
810			goto out;
811		ip6h = (struct ipv6hdr *)skb->data;
812		if (ip6h->version != 6)
813			goto out;
814		len = ntohs(ip6h->payload_len);
815		if (!len)
816			goto out;
817		if (len + sizeof(struct ipv6hdr) > skb->len)
818			goto out;
819		if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
820			goto out;
821		ip6h = ipv6_hdr(skb);
822		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
823			goto out;
824		uh = udp_hdr(skb);
825		ulen = ntohs(uh->len);
826		if (ulen != skb->len)
827			goto out;
828		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
829			goto out;
830		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
831			if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
832				continue;
833			if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
834				continue;
835			if (np->local_port && np->local_port != ntohs(uh->dest))
836				continue;
837
838			np->rx_hook(np, ntohs(uh->source),
839				       (char *)(uh+1),
840				       ulen - sizeof(struct udphdr));
841			hits++;
842		}
843#endif
844	}
845
846	if (!hits)
847		goto out;
848
849	kfree_skb(skb);
850	return 1;
851
852out:
853	if (atomic_read(&trapped)) {
854		kfree_skb(skb);
855		return 1;
856	}
857
858	return 0;
859}
860
861void netpoll_print_options(struct netpoll *np)
862{
863	np_info(np, "local port %d\n", np->local_port);
864	if (np->ipv6)
865		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
866	else
867		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
868	np_info(np, "interface '%s'\n", np->dev_name);
869	np_info(np, "remote port %d\n", np->remote_port);
870	if (np->ipv6)
871		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
872	else
873		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
874	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
875}
876EXPORT_SYMBOL(netpoll_print_options);
877
878static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
879{
880	const char *end;
881
882	if (!strchr(str, ':') &&
883	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
884		if (!*end)
885			return 0;
886	}
887	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
888#if IS_ENABLED(CONFIG_IPV6)
889		if (!*end)
890			return 1;
891#else
892		return -1;
893#endif
894	}
895	return -1;
896}
897
898int netpoll_parse_options(struct netpoll *np, char *opt)
899{
900	char *cur=opt, *delim;
901	int ipv6;
902
903	if (*cur != '@') {
904		if ((delim = strchr(cur, '@')) == NULL)
905			goto parse_failed;
906		*delim = 0;
907		if (kstrtou16(cur, 10, &np->local_port))
908			goto parse_failed;
909		cur = delim;
910	}
911	cur++;
912
913	if (*cur != '/') {
914		if ((delim = strchr(cur, '/')) == NULL)
915			goto parse_failed;
916		*delim = 0;
917		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
918		if (ipv6 < 0)
919			goto parse_failed;
920		else
921			np->ipv6 = (bool)ipv6;
922		cur = delim;
923	}
924	cur++;
925
926	if (*cur != ',') {
927		/* parse out dev name */
928		if ((delim = strchr(cur, ',')) == NULL)
929			goto parse_failed;
930		*delim = 0;
931		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
932		cur = delim;
933	}
934	cur++;
935
936	if (*cur != '@') {
937		/* dst port */
938		if ((delim = strchr(cur, '@')) == NULL)
939			goto parse_failed;
940		*delim = 0;
941		if (*cur == ' ' || *cur == '\t')
942			np_info(np, "warning: whitespace is not allowed\n");
943		if (kstrtou16(cur, 10, &np->remote_port))
944			goto parse_failed;
945		cur = delim;
946	}
947	cur++;
948
949	/* dst ip */
950	if ((delim = strchr(cur, '/')) == NULL)
951		goto parse_failed;
952	*delim = 0;
953	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
954	if (ipv6 < 0)
955		goto parse_failed;
956	else if (np->ipv6 != (bool)ipv6)
957		goto parse_failed;
958	else
959		np->ipv6 = (bool)ipv6;
960	cur = delim + 1;
961
962	if (*cur != 0) {
963		/* MAC address */
964		if (!mac_pton(cur, np->remote_mac))
965			goto parse_failed;
966	}
967
968	netpoll_print_options(np);
969
970	return 0;
971
972 parse_failed:
973	np_info(np, "couldn't parse config at '%s'!\n", cur);
974	return -1;
975}
976EXPORT_SYMBOL(netpoll_parse_options);
977
978int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
979{
980	struct netpoll_info *npinfo;
981	const struct net_device_ops *ops;
982	unsigned long flags;
983	int err;
984
985	np->dev = ndev;
986	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
987
988	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
989	    !ndev->netdev_ops->ndo_poll_controller) {
990		np_err(np, "%s doesn't support polling, aborting\n",
991		       np->dev_name);
992		err = -ENOTSUPP;
993		goto out;
994	}
995
996	if (!ndev->npinfo) {
997		npinfo = kmalloc(sizeof(*npinfo), gfp);
998		if (!npinfo) {
999			err = -ENOMEM;
1000			goto out;
1001		}
1002
1003		npinfo->rx_flags = 0;
1004		INIT_LIST_HEAD(&npinfo->rx_np);
1005
1006		spin_lock_init(&npinfo->rx_lock);
1007		skb_queue_head_init(&npinfo->neigh_tx);
1008		skb_queue_head_init(&npinfo->txq);
1009		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
1010
1011		atomic_set(&npinfo->refcnt, 1);
1012
1013		ops = np->dev->netdev_ops;
1014		if (ops->ndo_netpoll_setup) {
1015			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1016			if (err)
1017				goto free_npinfo;
1018		}
1019	} else {
1020		npinfo = ndev->npinfo;
1021		atomic_inc(&npinfo->refcnt);
1022	}
1023
1024	npinfo->netpoll = np;
1025
1026	if (np->rx_hook) {
1027		spin_lock_irqsave(&npinfo->rx_lock, flags);
1028		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1029		list_add_tail(&np->rx, &npinfo->rx_np);
1030		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1031	}
1032
1033	/* last thing to do is link it to the net device structure */
1034	rcu_assign_pointer(ndev->npinfo, npinfo);
1035
1036	return 0;
1037
1038free_npinfo:
1039	kfree(npinfo);
1040out:
1041	return err;
1042}
1043EXPORT_SYMBOL_GPL(__netpoll_setup);
1044
1045int netpoll_setup(struct netpoll *np)
1046{
1047	struct net_device *ndev = NULL;
1048	struct in_device *in_dev;
1049	int err;
1050
1051	rtnl_lock();
1052	if (np->dev_name)
1053		ndev = __dev_get_by_name(&init_net, np->dev_name);
1054	if (!ndev) {
1055		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1056		err = -ENODEV;
1057		goto unlock;
1058	}
1059	dev_hold(ndev);
1060
1061	if (netdev_master_upper_dev_get(ndev)) {
1062		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1063		err = -EBUSY;
1064		goto put;
1065	}
1066
1067	if (!netif_running(ndev)) {
1068		unsigned long atmost, atleast;
1069
1070		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
1071
1072		err = dev_open(ndev);
1073
1074		if (err) {
1075			np_err(np, "failed to open %s\n", ndev->name);
1076			goto put;
1077		}
1078
1079		rtnl_unlock();
1080		atleast = jiffies + HZ/10;
1081		atmost = jiffies + carrier_timeout * HZ;
1082		while (!netif_carrier_ok(ndev)) {
1083			if (time_after(jiffies, atmost)) {
1084				np_notice(np, "timeout waiting for carrier\n");
1085				break;
1086			}
1087			msleep(1);
1088		}
1089
1090		/* If carrier appears to come up instantly, we don't
1091		 * trust it and pause so that we don't pump all our
1092		 * queued console messages into the bitbucket.
1093		 */
1094
1095		if (time_before(jiffies, atleast)) {
1096			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
1097			msleep(4000);
1098		}
1099		rtnl_lock();
1100	}
1101
1102	if (!np->local_ip.ip) {
1103		if (!np->ipv6) {
1104			in_dev = __in_dev_get_rtnl(ndev);
1105
1106			if (!in_dev || !in_dev->ifa_list) {
1107				np_err(np, "no IP address for %s, aborting\n",
1108				       np->dev_name);
1109				err = -EDESTADDRREQ;
1110				goto put;
1111			}
1112
1113			np->local_ip.ip = in_dev->ifa_list->ifa_local;
1114			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1115		} else {
1116#if IS_ENABLED(CONFIG_IPV6)
1117			struct inet6_dev *idev;
1118
1119			err = -EDESTADDRREQ;
1120			idev = __in6_dev_get(ndev);
1121			if (idev) {
1122				struct inet6_ifaddr *ifp;
1123
1124				read_lock_bh(&idev->lock);
1125				list_for_each_entry(ifp, &idev->addr_list, if_list) {
1126					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1127						continue;
1128					np->local_ip.in6 = ifp->addr;
1129					err = 0;
1130					break;
1131				}
1132				read_unlock_bh(&idev->lock);
1133			}
1134			if (err) {
1135				np_err(np, "no IPv6 address for %s, aborting\n",
1136				       np->dev_name);
1137				goto put;
1138			} else
1139				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1140#else
1141			np_err(np, "IPv6 is not supported %s, aborting\n",
1142			       np->dev_name);
1143			err = -EINVAL;
1144			goto put;
1145#endif
1146		}
1147	}
1148
1149	/* fill up the skb queue */
1150	refill_skbs();
1151
1152	err = __netpoll_setup(np, ndev, GFP_KERNEL);
1153	if (err)
1154		goto put;
1155
1156	rtnl_unlock();
1157	return 0;
1158
1159put:
1160	dev_put(ndev);
1161unlock:
1162	rtnl_unlock();
1163	return err;
1164}
1165EXPORT_SYMBOL(netpoll_setup);
1166
1167static int __init netpoll_init(void)
1168{
1169	skb_queue_head_init(&skb_pool);
1170	return 0;
1171}
1172core_initcall(netpoll_init);
1173
1174static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
1175{
1176	struct netpoll_info *npinfo =
1177			container_of(rcu_head, struct netpoll_info, rcu);
1178
1179	skb_queue_purge(&npinfo->neigh_tx);
1180	skb_queue_purge(&npinfo->txq);
1181
1182	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
1183	cancel_delayed_work(&npinfo->tx_work);
1184
1185	/* clean after last, unfinished work */
1186	__skb_queue_purge(&npinfo->txq);
1187	/* now cancel it again */
1188	cancel_delayed_work(&npinfo->tx_work);
1189	kfree(npinfo);
1190}
1191
1192void __netpoll_cleanup(struct netpoll *np)
1193{
1194	struct netpoll_info *npinfo;
1195	unsigned long flags;
1196
1197	npinfo = np->dev->npinfo;
1198	if (!npinfo)
1199		return;
1200
1201	if (!list_empty(&npinfo->rx_np)) {
1202		spin_lock_irqsave(&npinfo->rx_lock, flags);
1203		list_del(&np->rx);
1204		if (list_empty(&npinfo->rx_np))
1205			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
1206		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1207	}
1208
1209	if (atomic_dec_and_test(&npinfo->refcnt)) {
1210		const struct net_device_ops *ops;
1211
1212		ops = np->dev->netdev_ops;
1213		if (ops->ndo_netpoll_cleanup)
1214			ops->ndo_netpoll_cleanup(np->dev);
1215
1216		RCU_INIT_POINTER(np->dev->npinfo, NULL);
1217		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
1218	}
1219}
1220EXPORT_SYMBOL_GPL(__netpoll_cleanup);
1221
1222static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
1223{
1224	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
1225
1226	__netpoll_cleanup(np);
1227	kfree(np);
1228}
1229
1230void __netpoll_free_rcu(struct netpoll *np)
1231{
1232	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
1233}
1234EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
1235
1236void netpoll_cleanup(struct netpoll *np)
1237{
1238	if (!np->dev)
1239		return;
1240
1241	rtnl_lock();
1242	__netpoll_cleanup(np);
1243	rtnl_unlock();
1244
1245	dev_put(np->dev);
1246	np->dev = NULL;
1247}
1248EXPORT_SYMBOL(netpoll_cleanup);
1249
1250int netpoll_trap(void)
1251{
1252	return atomic_read(&trapped);
1253}
1254EXPORT_SYMBOL(netpoll_trap);
1255
1256void netpoll_set_trap(int trap)
1257{
1258	if (trap)
1259		atomic_inc(&trapped);
1260	else
1261		atomic_dec(&trapped);
1262}
1263EXPORT_SYMBOL(netpoll_set_trap);
1264