1/*
2 *	Generic address resolution entity
3 *
4 *	Authors:
5 *	Pedro Roque		<roque@di.fc.ul.pt>
6 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7 *
8 *	This program is free software; you can redistribute it and/or
9 *      modify it under the terms of the GNU General Public License
10 *      as published by the Free Software Foundation; either version
11 *      2 of the License, or (at your option) any later version.
12 *
13 *	Fixes:
14 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15 *	Harald Welte		Add neighbour cache statistics like rtstat
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20#include <linux/slab.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/module.h>
24#include <linux/socket.h>
25#include <linux/netdevice.h>
26#include <linux/proc_fs.h>
27#ifdef CONFIG_SYSCTL
28#include <linux/sysctl.h>
29#endif
30#include <linux/times.h>
31#include <net/net_namespace.h>
32#include <net/neighbour.h>
33#include <net/dst.h>
34#include <net/sock.h>
35#include <net/netevent.h>
36#include <net/netlink.h>
37#include <linux/rtnetlink.h>
38#include <linux/random.h>
39#include <linux/string.h>
40#include <linux/log2.h>
41
42#define DEBUG
43#define NEIGH_DEBUG 1
44#define neigh_dbg(level, fmt, ...)		\
45do {						\
46	if (level <= NEIGH_DEBUG)		\
47		pr_debug(fmt, ##__VA_ARGS__);	\
48} while (0)
49
50#define PNEIGH_HASHMASK		0xF
51
52static void neigh_timer_handler(unsigned long arg);
53static void __neigh_notify(struct neighbour *n, int type, int flags);
54static void neigh_update_notify(struct neighbour *neigh);
55static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57static struct neigh_table *neigh_tables;
58#ifdef CONFIG_PROC_FS
59static const struct file_operations neigh_stat_seq_fops;
60#endif
61
62/*
63   Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65   - All the scans/updates to hash buckets MUST be made under this lock.
66   - NOTHING clever should be made under this lock: no callbacks
67     to protocol backends, no attempts to send something to network.
68     It will result in deadlocks, if backend/driver wants to use neighbour
69     cache.
70   - If the entry requires some non-trivial actions, increase
71     its reference count and release table lock.
72
73   Neighbour entries are protected:
74   - with reference count.
75   - with rwlock neigh->lock
76
77   Reference count prevents destruction.
78
79   neigh->lock mainly serializes ll address data and its validity state.
80   However, the same lock is used to protect another entry fields:
81    - timer
82    - resolution queue
83
84   Again, nothing clever shall be made under neigh->lock,
85   the most complicated procedure, which we allow is dev->hard_header.
86   It is supposed, that dev->hard_header is simplistic and does
87   not make callbacks to neighbour tables.
88
89   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90   list of neighbour tables. This list is used only in process context,
91 */
92
93static DEFINE_RWLOCK(neigh_tbl_lock);
94
95static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96{
97	kfree_skb(skb);
98	return -ENETDOWN;
99}
100
101static void neigh_cleanup_and_release(struct neighbour *neigh)
102{
103	if (neigh->parms->neigh_cleanup)
104		neigh->parms->neigh_cleanup(neigh);
105
106	__neigh_notify(neigh, RTM_DELNEIGH, 0);
107	neigh_release(neigh);
108}
109
110/*
111 * It is random distribution in the interval (1/2)*base...(3/2)*base.
112 * It corresponds to default IPv6 settings and is not overridable,
113 * because it is really reasonable choice.
114 */
115
116unsigned long neigh_rand_reach_time(unsigned long base)
117{
118	return base ? (net_random() % base) + (base >> 1) : 0;
119}
120EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123static int neigh_forced_gc(struct neigh_table *tbl)
124{
125	int shrunk = 0;
126	int i;
127	struct neigh_hash_table *nht;
128
129	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131	write_lock_bh(&tbl->lock);
132	nht = rcu_dereference_protected(tbl->nht,
133					lockdep_is_held(&tbl->lock));
134	for (i = 0; i < (1 << nht->hash_shift); i++) {
135		struct neighbour *n;
136		struct neighbour __rcu **np;
137
138		np = &nht->hash_buckets[i];
139		while ((n = rcu_dereference_protected(*np,
140					lockdep_is_held(&tbl->lock))) != NULL) {
141			/* Neighbour record may be discarded if:
142			 * - nobody refers to it.
143			 * - it is not permanent
144			 */
145			write_lock(&n->lock);
146			if (atomic_read(&n->refcnt) == 1 &&
147			    !(n->nud_state & NUD_PERMANENT)) {
148				rcu_assign_pointer(*np,
149					rcu_dereference_protected(n->next,
150						  lockdep_is_held(&tbl->lock)));
151				n->dead = 1;
152				shrunk	= 1;
153				write_unlock(&n->lock);
154				neigh_cleanup_and_release(n);
155				continue;
156			}
157			write_unlock(&n->lock);
158			np = &n->next;
159		}
160	}
161
162	tbl->last_flush = jiffies;
163
164	write_unlock_bh(&tbl->lock);
165
166	return shrunk;
167}
168
169static void neigh_add_timer(struct neighbour *n, unsigned long when)
170{
171	neigh_hold(n);
172	if (unlikely(mod_timer(&n->timer, when))) {
173		printk("NEIGH: BUG, double timer add, state is %x\n",
174		       n->nud_state);
175		dump_stack();
176	}
177}
178
179static int neigh_del_timer(struct neighbour *n)
180{
181	if ((n->nud_state & NUD_IN_TIMER) &&
182	    del_timer(&n->timer)) {
183		neigh_release(n);
184		return 1;
185	}
186	return 0;
187}
188
189static void pneigh_queue_purge(struct sk_buff_head *list)
190{
191	struct sk_buff *skb;
192
193	while ((skb = skb_dequeue(list)) != NULL) {
194		dev_put(skb->dev);
195		kfree_skb(skb);
196	}
197}
198
199static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200{
201	int i;
202	struct neigh_hash_table *nht;
203
204	nht = rcu_dereference_protected(tbl->nht,
205					lockdep_is_held(&tbl->lock));
206
207	for (i = 0; i < (1 << nht->hash_shift); i++) {
208		struct neighbour *n;
209		struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211		while ((n = rcu_dereference_protected(*np,
212					lockdep_is_held(&tbl->lock))) != NULL) {
213			if (dev && n->dev != dev) {
214				np = &n->next;
215				continue;
216			}
217			rcu_assign_pointer(*np,
218				   rcu_dereference_protected(n->next,
219						lockdep_is_held(&tbl->lock)));
220			write_lock(&n->lock);
221			neigh_del_timer(n);
222			n->dead = 1;
223
224			if (atomic_read(&n->refcnt) != 1) {
225				/* The most unpleasant situation.
226				   We must destroy neighbour entry,
227				   but someone still uses it.
228
229				   The destroy will be delayed until
230				   the last user releases us, but
231				   we must kill timers etc. and move
232				   it to safe state.
233				 */
234				skb_queue_purge(&n->arp_queue);
235				n->arp_queue_len_bytes = 0;
236				n->output = neigh_blackhole;
237				if (n->nud_state & NUD_VALID)
238					n->nud_state = NUD_NOARP;
239				else
240					n->nud_state = NUD_NONE;
241				neigh_dbg(2, "neigh %p is stray\n", n);
242			}
243			write_unlock(&n->lock);
244			neigh_cleanup_and_release(n);
245		}
246	}
247}
248
249void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250{
251	write_lock_bh(&tbl->lock);
252	neigh_flush_dev(tbl, dev);
253	write_unlock_bh(&tbl->lock);
254}
255EXPORT_SYMBOL(neigh_changeaddr);
256
257int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258{
259	write_lock_bh(&tbl->lock);
260	neigh_flush_dev(tbl, dev);
261	pneigh_ifdown(tbl, dev);
262	write_unlock_bh(&tbl->lock);
263
264	del_timer_sync(&tbl->proxy_timer);
265	pneigh_queue_purge(&tbl->proxy_queue);
266	return 0;
267}
268EXPORT_SYMBOL(neigh_ifdown);
269
270static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271{
272	struct neighbour *n = NULL;
273	unsigned long now = jiffies;
274	int entries;
275
276	entries = atomic_inc_return(&tbl->entries) - 1;
277	if (entries >= tbl->gc_thresh3 ||
278	    (entries >= tbl->gc_thresh2 &&
279	     time_after(now, tbl->last_flush + 5 * HZ))) {
280		if (!neigh_forced_gc(tbl) &&
281		    entries >= tbl->gc_thresh3)
282			goto out_entries;
283	}
284
285	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286	if (!n)
287		goto out_entries;
288
289	skb_queue_head_init(&n->arp_queue);
290	rwlock_init(&n->lock);
291	seqlock_init(&n->ha_lock);
292	n->updated	  = n->used = now;
293	n->nud_state	  = NUD_NONE;
294	n->output	  = neigh_blackhole;
295	seqlock_init(&n->hh.hh_lock);
296	n->parms	  = neigh_parms_clone(&tbl->parms);
297	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299	NEIGH_CACHE_STAT_INC(tbl, allocs);
300	n->tbl		  = tbl;
301	atomic_set(&n->refcnt, 1);
302	n->dead		  = 1;
303out:
304	return n;
305
306out_entries:
307	atomic_dec(&tbl->entries);
308	goto out;
309}
310
311static void neigh_get_hash_rnd(u32 *x)
312{
313	get_random_bytes(x, sizeof(*x));
314	*x |= 1;
315}
316
317static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318{
319	size_t size = (1 << shift) * sizeof(struct neighbour *);
320	struct neigh_hash_table *ret;
321	struct neighbour __rcu **buckets;
322	int i;
323
324	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325	if (!ret)
326		return NULL;
327	if (size <= PAGE_SIZE)
328		buckets = kzalloc(size, GFP_ATOMIC);
329	else
330		buckets = (struct neighbour __rcu **)
331			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332					   get_order(size));
333	if (!buckets) {
334		kfree(ret);
335		return NULL;
336	}
337	ret->hash_buckets = buckets;
338	ret->hash_shift = shift;
339	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340		neigh_get_hash_rnd(&ret->hash_rnd[i]);
341	return ret;
342}
343
344static void neigh_hash_free_rcu(struct rcu_head *head)
345{
346	struct neigh_hash_table *nht = container_of(head,
347						    struct neigh_hash_table,
348						    rcu);
349	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350	struct neighbour __rcu **buckets = nht->hash_buckets;
351
352	if (size <= PAGE_SIZE)
353		kfree(buckets);
354	else
355		free_pages((unsigned long)buckets, get_order(size));
356	kfree(nht);
357}
358
359static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360						unsigned long new_shift)
361{
362	unsigned int i, hash;
363	struct neigh_hash_table *new_nht, *old_nht;
364
365	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367	old_nht = rcu_dereference_protected(tbl->nht,
368					    lockdep_is_held(&tbl->lock));
369	new_nht = neigh_hash_alloc(new_shift);
370	if (!new_nht)
371		return old_nht;
372
373	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374		struct neighbour *n, *next;
375
376		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377						   lockdep_is_held(&tbl->lock));
378		     n != NULL;
379		     n = next) {
380			hash = tbl->hash(n->primary_key, n->dev,
381					 new_nht->hash_rnd);
382
383			hash >>= (32 - new_nht->hash_shift);
384			next = rcu_dereference_protected(n->next,
385						lockdep_is_held(&tbl->lock));
386
387			rcu_assign_pointer(n->next,
388					   rcu_dereference_protected(
389						new_nht->hash_buckets[hash],
390						lockdep_is_held(&tbl->lock)));
391			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392		}
393	}
394
395	rcu_assign_pointer(tbl->nht, new_nht);
396	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397	return new_nht;
398}
399
400struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401			       struct net_device *dev)
402{
403	struct neighbour *n;
404	int key_len = tbl->key_len;
405	u32 hash_val;
406	struct neigh_hash_table *nht;
407
408	NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410	rcu_read_lock_bh();
411	nht = rcu_dereference_bh(tbl->nht);
412	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415	     n != NULL;
416	     n = rcu_dereference_bh(n->next)) {
417		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418			if (!atomic_inc_not_zero(&n->refcnt))
419				n = NULL;
420			NEIGH_CACHE_STAT_INC(tbl, hits);
421			break;
422		}
423	}
424
425	rcu_read_unlock_bh();
426	return n;
427}
428EXPORT_SYMBOL(neigh_lookup);
429
430struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431				     const void *pkey)
432{
433	struct neighbour *n;
434	int key_len = tbl->key_len;
435	u32 hash_val;
436	struct neigh_hash_table *nht;
437
438	NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440	rcu_read_lock_bh();
441	nht = rcu_dereference_bh(tbl->nht);
442	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445	     n != NULL;
446	     n = rcu_dereference_bh(n->next)) {
447		if (!memcmp(n->primary_key, pkey, key_len) &&
448		    net_eq(dev_net(n->dev), net)) {
449			if (!atomic_inc_not_zero(&n->refcnt))
450				n = NULL;
451			NEIGH_CACHE_STAT_INC(tbl, hits);
452			break;
453		}
454	}
455
456	rcu_read_unlock_bh();
457	return n;
458}
459EXPORT_SYMBOL(neigh_lookup_nodev);
460
461struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462				 struct net_device *dev, bool want_ref)
463{
464	u32 hash_val;
465	int key_len = tbl->key_len;
466	int error;
467	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468	struct neigh_hash_table *nht;
469
470	if (!n) {
471		rc = ERR_PTR(-ENOBUFS);
472		goto out;
473	}
474
475	memcpy(n->primary_key, pkey, key_len);
476	n->dev = dev;
477	dev_hold(dev);
478
479	/* Protocol specific setup. */
480	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
481		rc = ERR_PTR(error);
482		goto out_neigh_release;
483	}
484
485	if (dev->netdev_ops->ndo_neigh_construct) {
486		error = dev->netdev_ops->ndo_neigh_construct(n);
487		if (error < 0) {
488			rc = ERR_PTR(error);
489			goto out_neigh_release;
490		}
491	}
492
493	/* Device specific setup. */
494	if (n->parms->neigh_setup &&
495	    (error = n->parms->neigh_setup(n)) < 0) {
496		rc = ERR_PTR(error);
497		goto out_neigh_release;
498	}
499
500	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502	write_lock_bh(&tbl->lock);
503	nht = rcu_dereference_protected(tbl->nht,
504					lockdep_is_held(&tbl->lock));
505
506	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511	if (n->parms->dead) {
512		rc = ERR_PTR(-EINVAL);
513		goto out_tbl_unlock;
514	}
515
516	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517					    lockdep_is_held(&tbl->lock));
518	     n1 != NULL;
519	     n1 = rcu_dereference_protected(n1->next,
520			lockdep_is_held(&tbl->lock))) {
521		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522			if (want_ref)
523				neigh_hold(n1);
524			rc = n1;
525			goto out_tbl_unlock;
526		}
527	}
528
529	n->dead = 0;
530	if (want_ref)
531		neigh_hold(n);
532	rcu_assign_pointer(n->next,
533			   rcu_dereference_protected(nht->hash_buckets[hash_val],
534						     lockdep_is_held(&tbl->lock)));
535	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536	write_unlock_bh(&tbl->lock);
537	neigh_dbg(2, "neigh %p is created\n", n);
538	rc = n;
539out:
540	return rc;
541out_tbl_unlock:
542	write_unlock_bh(&tbl->lock);
543out_neigh_release:
544	neigh_release(n);
545	goto out;
546}
547EXPORT_SYMBOL(__neigh_create);
548
549static u32 pneigh_hash(const void *pkey, int key_len)
550{
551	u32 hash_val = *(u32 *)(pkey + key_len - 4);
552	hash_val ^= (hash_val >> 16);
553	hash_val ^= hash_val >> 8;
554	hash_val ^= hash_val >> 4;
555	hash_val &= PNEIGH_HASHMASK;
556	return hash_val;
557}
558
559static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560					      struct net *net,
561					      const void *pkey,
562					      int key_len,
563					      struct net_device *dev)
564{
565	while (n) {
566		if (!memcmp(n->key, pkey, key_len) &&
567		    net_eq(pneigh_net(n), net) &&
568		    (n->dev == dev || !n->dev))
569			return n;
570		n = n->next;
571	}
572	return NULL;
573}
574
575struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576		struct net *net, const void *pkey, struct net_device *dev)
577{
578	int key_len = tbl->key_len;
579	u32 hash_val = pneigh_hash(pkey, key_len);
580
581	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582				 net, pkey, key_len, dev);
583}
584EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587				    struct net *net, const void *pkey,
588				    struct net_device *dev, int creat)
589{
590	struct pneigh_entry *n;
591	int key_len = tbl->key_len;
592	u32 hash_val = pneigh_hash(pkey, key_len);
593
594	read_lock_bh(&tbl->lock);
595	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596			      net, pkey, key_len, dev);
597	read_unlock_bh(&tbl->lock);
598
599	if (n || !creat)
600		goto out;
601
602	ASSERT_RTNL();
603
604	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605	if (!n)
606		goto out;
607
608	write_pnet(&n->net, hold_net(net));
609	memcpy(n->key, pkey, key_len);
610	n->dev = dev;
611	if (dev)
612		dev_hold(dev);
613
614	if (tbl->pconstructor && tbl->pconstructor(n)) {
615		if (dev)
616			dev_put(dev);
617		release_net(net);
618		kfree(n);
619		n = NULL;
620		goto out;
621	}
622
623	write_lock_bh(&tbl->lock);
624	n->next = tbl->phash_buckets[hash_val];
625	tbl->phash_buckets[hash_val] = n;
626	write_unlock_bh(&tbl->lock);
627out:
628	return n;
629}
630EXPORT_SYMBOL(pneigh_lookup);
631
632
633int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634		  struct net_device *dev)
635{
636	struct pneigh_entry *n, **np;
637	int key_len = tbl->key_len;
638	u32 hash_val = pneigh_hash(pkey, key_len);
639
640	write_lock_bh(&tbl->lock);
641	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642	     np = &n->next) {
643		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644		    net_eq(pneigh_net(n), net)) {
645			*np = n->next;
646			write_unlock_bh(&tbl->lock);
647			if (tbl->pdestructor)
648				tbl->pdestructor(n);
649			if (n->dev)
650				dev_put(n->dev);
651			release_net(pneigh_net(n));
652			kfree(n);
653			return 0;
654		}
655	}
656	write_unlock_bh(&tbl->lock);
657	return -ENOENT;
658}
659
660static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661{
662	struct pneigh_entry *n, **np;
663	u32 h;
664
665	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666		np = &tbl->phash_buckets[h];
667		while ((n = *np) != NULL) {
668			if (!dev || n->dev == dev) {
669				*np = n->next;
670				if (tbl->pdestructor)
671					tbl->pdestructor(n);
672				if (n->dev)
673					dev_put(n->dev);
674				release_net(pneigh_net(n));
675				kfree(n);
676				continue;
677			}
678			np = &n->next;
679		}
680	}
681	return -ENOENT;
682}
683
684static void neigh_parms_destroy(struct neigh_parms *parms);
685
686static inline void neigh_parms_put(struct neigh_parms *parms)
687{
688	if (atomic_dec_and_test(&parms->refcnt))
689		neigh_parms_destroy(parms);
690}
691
692/*
693 *	neighbour must already be out of the table;
694 *
695 */
696void neigh_destroy(struct neighbour *neigh)
697{
698	struct net_device *dev = neigh->dev;
699
700	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702	if (!neigh->dead) {
703		pr_warn("Destroying alive neighbour %p\n", neigh);
704		dump_stack();
705		return;
706	}
707
708	if (neigh_del_timer(neigh))
709		pr_warn("Impossible event\n");
710
711	skb_queue_purge(&neigh->arp_queue);
712	neigh->arp_queue_len_bytes = 0;
713
714	if (dev->netdev_ops->ndo_neigh_destroy)
715		dev->netdev_ops->ndo_neigh_destroy(neigh);
716
717	dev_put(dev);
718	neigh_parms_put(neigh->parms);
719
720	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
721
722	atomic_dec(&neigh->tbl->entries);
723	kfree_rcu(neigh, rcu);
724}
725EXPORT_SYMBOL(neigh_destroy);
726
727/* Neighbour state is suspicious;
728   disable fast path.
729
730   Called with write_locked neigh.
731 */
732static void neigh_suspect(struct neighbour *neigh)
733{
734	neigh_dbg(2, "neigh %p is suspected\n", neigh);
735
736	neigh->output = neigh->ops->output;
737}
738
739/* Neighbour state is OK;
740   enable fast path.
741
742   Called with write_locked neigh.
743 */
744static void neigh_connect(struct neighbour *neigh)
745{
746	neigh_dbg(2, "neigh %p is connected\n", neigh);
747
748	neigh->output = neigh->ops->connected_output;
749}
750
751static void neigh_periodic_work(struct work_struct *work)
752{
753	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
754	struct neighbour *n;
755	struct neighbour __rcu **np;
756	unsigned int i;
757	struct neigh_hash_table *nht;
758
759	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
760
761	write_lock_bh(&tbl->lock);
762	nht = rcu_dereference_protected(tbl->nht,
763					lockdep_is_held(&tbl->lock));
764
765	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
766		goto out;
767
768	/*
769	 *	periodically recompute ReachableTime from random function
770	 */
771
772	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
773		struct neigh_parms *p;
774		tbl->last_rand = jiffies;
775		for (p = &tbl->parms; p; p = p->next)
776			p->reachable_time =
777				neigh_rand_reach_time(p->base_reachable_time);
778	}
779
780	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
781		np = &nht->hash_buckets[i];
782
783		while ((n = rcu_dereference_protected(*np,
784				lockdep_is_held(&tbl->lock))) != NULL) {
785			unsigned int state;
786
787			write_lock(&n->lock);
788
789			state = n->nud_state;
790			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
791				write_unlock(&n->lock);
792				goto next_elt;
793			}
794
795			if (time_before(n->used, n->confirmed))
796				n->used = n->confirmed;
797
798			if (atomic_read(&n->refcnt) == 1 &&
799			    (state == NUD_FAILED ||
800			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
801				*np = n->next;
802				n->dead = 1;
803				write_unlock(&n->lock);
804				neigh_cleanup_and_release(n);
805				continue;
806			}
807			write_unlock(&n->lock);
808
809next_elt:
810			np = &n->next;
811		}
812		/*
813		 * It's fine to release lock here, even if hash table
814		 * grows while we are preempted.
815		 */
816		write_unlock_bh(&tbl->lock);
817		cond_resched();
818		write_lock_bh(&tbl->lock);
819		nht = rcu_dereference_protected(tbl->nht,
820						lockdep_is_held(&tbl->lock));
821	}
822out:
823	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
824	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
825	 * base_reachable_time.
826	 */
827	schedule_delayed_work(&tbl->gc_work,
828			      tbl->parms.base_reachable_time >> 1);
829	write_unlock_bh(&tbl->lock);
830}
831
832static __inline__ int neigh_max_probes(struct neighbour *n)
833{
834	struct neigh_parms *p = n->parms;
835	return (n->nud_state & NUD_PROBE) ?
836		p->ucast_probes :
837		p->ucast_probes + p->app_probes + p->mcast_probes;
838}
839
840static void neigh_invalidate(struct neighbour *neigh)
841	__releases(neigh->lock)
842	__acquires(neigh->lock)
843{
844	struct sk_buff *skb;
845
846	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
847	neigh_dbg(2, "neigh %p is failed\n", neigh);
848	neigh->updated = jiffies;
849
850	/* It is very thin place. report_unreachable is very complicated
851	   routine. Particularly, it can hit the same neighbour entry!
852
853	   So that, we try to be accurate and avoid dead loop. --ANK
854	 */
855	while (neigh->nud_state == NUD_FAILED &&
856	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
857		write_unlock(&neigh->lock);
858		neigh->ops->error_report(neigh, skb);
859		write_lock(&neigh->lock);
860	}
861	skb_queue_purge(&neigh->arp_queue);
862	neigh->arp_queue_len_bytes = 0;
863}
864
865static void neigh_probe(struct neighbour *neigh)
866	__releases(neigh->lock)
867{
868	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
869	/* keep skb alive even if arp_queue overflows */
870	if (skb)
871		skb = skb_copy(skb, GFP_ATOMIC);
872	write_unlock(&neigh->lock);
873	neigh->ops->solicit(neigh, skb);
874	atomic_inc(&neigh->probes);
875	kfree_skb(skb);
876}
877
878/* Called when a timer expires for a neighbour entry. */
879
880static void neigh_timer_handler(unsigned long arg)
881{
882	unsigned long now, next;
883	struct neighbour *neigh = (struct neighbour *)arg;
884	unsigned int state;
885	int notify = 0;
886
887	write_lock(&neigh->lock);
888
889	state = neigh->nud_state;
890	now = jiffies;
891	next = now + HZ;
892
893	if (!(state & NUD_IN_TIMER))
894		goto out;
895
896	if (state & NUD_REACHABLE) {
897		if (time_before_eq(now,
898				   neigh->confirmed + neigh->parms->reachable_time)) {
899			neigh_dbg(2, "neigh %p is still alive\n", neigh);
900			next = neigh->confirmed + neigh->parms->reachable_time;
901		} else if (time_before_eq(now,
902					  neigh->used + neigh->parms->delay_probe_time)) {
903			neigh_dbg(2, "neigh %p is delayed\n", neigh);
904			neigh->nud_state = NUD_DELAY;
905			neigh->updated = jiffies;
906			neigh_suspect(neigh);
907			next = now + neigh->parms->delay_probe_time;
908		} else {
909			neigh_dbg(2, "neigh %p is suspected\n", neigh);
910			neigh->nud_state = NUD_STALE;
911			neigh->updated = jiffies;
912			neigh_suspect(neigh);
913			notify = 1;
914		}
915	} else if (state & NUD_DELAY) {
916		if (time_before_eq(now,
917				   neigh->confirmed + neigh->parms->delay_probe_time)) {
918			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
919			neigh->nud_state = NUD_REACHABLE;
920			neigh->updated = jiffies;
921			neigh_connect(neigh);
922			notify = 1;
923			next = neigh->confirmed + neigh->parms->reachable_time;
924		} else {
925			neigh_dbg(2, "neigh %p is probed\n", neigh);
926			neigh->nud_state = NUD_PROBE;
927			neigh->updated = jiffies;
928			atomic_set(&neigh->probes, 0);
929			notify = 1;
930			next = now + neigh->parms->retrans_time;
931		}
932	} else {
933		/* NUD_PROBE|NUD_INCOMPLETE */
934		next = now + neigh->parms->retrans_time;
935	}
936
937	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
938	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
939		neigh->nud_state = NUD_FAILED;
940		notify = 1;
941		neigh_invalidate(neigh);
942	}
943
944	if (neigh->nud_state & NUD_IN_TIMER) {
945		if (time_before(next, jiffies + HZ/2))
946			next = jiffies + HZ/2;
947		if (!mod_timer(&neigh->timer, next))
948			neigh_hold(neigh);
949	}
950	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
951		neigh_probe(neigh);
952	} else {
953out:
954		write_unlock(&neigh->lock);
955	}
956
957	if (notify)
958		neigh_update_notify(neigh);
959
960	neigh_release(neigh);
961}
962
963int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
964{
965	int rc;
966	bool immediate_probe = false;
967
968	write_lock_bh(&neigh->lock);
969
970	rc = 0;
971	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
972		goto out_unlock_bh;
973
974	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
975		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
976			unsigned long next, now = jiffies;
977
978			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
979			neigh->nud_state     = NUD_INCOMPLETE;
980			neigh->updated = now;
981			next = now + max(neigh->parms->retrans_time, HZ/2);
982			neigh_add_timer(neigh, next);
983			immediate_probe = true;
984		} else {
985			neigh->nud_state = NUD_FAILED;
986			neigh->updated = jiffies;
987			write_unlock_bh(&neigh->lock);
988
989			kfree_skb(skb);
990			return 1;
991		}
992	} else if (neigh->nud_state & NUD_STALE) {
993		neigh_dbg(2, "neigh %p is delayed\n", neigh);
994		neigh->nud_state = NUD_DELAY;
995		neigh->updated = jiffies;
996		neigh_add_timer(neigh,
997				jiffies + neigh->parms->delay_probe_time);
998	}
999
1000	if (neigh->nud_state == NUD_INCOMPLETE) {
1001		if (skb) {
1002			while (neigh->arp_queue_len_bytes + skb->truesize >
1003			       neigh->parms->queue_len_bytes) {
1004				struct sk_buff *buff;
1005
1006				buff = __skb_dequeue(&neigh->arp_queue);
1007				if (!buff)
1008					break;
1009				neigh->arp_queue_len_bytes -= buff->truesize;
1010				kfree_skb(buff);
1011				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1012			}
1013			skb_dst_force(skb);
1014			__skb_queue_tail(&neigh->arp_queue, skb);
1015			neigh->arp_queue_len_bytes += skb->truesize;
1016		}
1017		rc = 1;
1018	}
1019out_unlock_bh:
1020	if (immediate_probe)
1021		neigh_probe(neigh);
1022	else
1023		write_unlock(&neigh->lock);
1024	local_bh_enable();
1025	return rc;
1026}
1027EXPORT_SYMBOL(__neigh_event_send);
1028
1029static void neigh_update_hhs(struct neighbour *neigh)
1030{
1031	struct hh_cache *hh;
1032	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1033		= NULL;
1034
1035	if (neigh->dev->header_ops)
1036		update = neigh->dev->header_ops->cache_update;
1037
1038	if (update) {
1039		hh = &neigh->hh;
1040		if (hh->hh_len) {
1041			write_seqlock_bh(&hh->hh_lock);
1042			update(hh, neigh->dev, neigh->ha);
1043			write_sequnlock_bh(&hh->hh_lock);
1044		}
1045	}
1046}
1047
1048
1049
1050/* Generic update routine.
1051   -- lladdr is new lladdr or NULL, if it is not supplied.
1052   -- new    is new state.
1053   -- flags
1054	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1055				if it is different.
1056	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1057				lladdr instead of overriding it
1058				if it is different.
1059				It also allows to retain current state
1060				if lladdr is unchanged.
1061	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1062
1063	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1064				NTF_ROUTER flag.
1065	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1066				a router.
1067
1068   Caller MUST hold reference count on the entry.
1069 */
1070
1071int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1072		 u32 flags)
1073{
1074	u8 old;
1075	int err;
1076	int notify = 0;
1077	struct net_device *dev;
1078	int update_isrouter = 0;
1079
1080	write_lock_bh(&neigh->lock);
1081
1082	dev    = neigh->dev;
1083	old    = neigh->nud_state;
1084	err    = -EPERM;
1085
1086	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1087	    (old & (NUD_NOARP | NUD_PERMANENT)))
1088		goto out;
1089
1090	if (!(new & NUD_VALID)) {
1091		neigh_del_timer(neigh);
1092		if (old & NUD_CONNECTED)
1093			neigh_suspect(neigh);
1094		neigh->nud_state = new;
1095		err = 0;
1096		notify = old & NUD_VALID;
1097		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1098		    (new & NUD_FAILED)) {
1099			neigh_invalidate(neigh);
1100			notify = 1;
1101		}
1102		goto out;
1103	}
1104
1105	/* Compare new lladdr with cached one */
1106	if (!dev->addr_len) {
1107		/* First case: device needs no address. */
1108		lladdr = neigh->ha;
1109	} else if (lladdr) {
1110		/* The second case: if something is already cached
1111		   and a new address is proposed:
1112		   - compare new & old
1113		   - if they are different, check override flag
1114		 */
1115		if ((old & NUD_VALID) &&
1116		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1117			lladdr = neigh->ha;
1118	} else {
1119		/* No address is supplied; if we know something,
1120		   use it, otherwise discard the request.
1121		 */
1122		err = -EINVAL;
1123		if (!(old & NUD_VALID))
1124			goto out;
1125		lladdr = neigh->ha;
1126	}
1127
1128	if (new & NUD_CONNECTED)
1129		neigh->confirmed = jiffies;
1130	neigh->updated = jiffies;
1131
1132	/* If entry was valid and address is not changed,
1133	   do not change entry state, if new one is STALE.
1134	 */
1135	err = 0;
1136	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1137	if (old & NUD_VALID) {
1138		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1139			update_isrouter = 0;
1140			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1141			    (old & NUD_CONNECTED)) {
1142				lladdr = neigh->ha;
1143				new = NUD_STALE;
1144			} else
1145				goto out;
1146		} else {
1147			if (lladdr == neigh->ha && new == NUD_STALE &&
1148			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1149			     (old & NUD_CONNECTED))
1150			    )
1151				new = old;
1152		}
1153	}
1154
1155	if (new != old) {
1156		neigh_del_timer(neigh);
1157		if (new & NUD_PROBE)
1158			atomic_set(&neigh->probes, 0);
1159		if (new & NUD_IN_TIMER)
1160			neigh_add_timer(neigh, (jiffies +
1161						((new & NUD_REACHABLE) ?
1162						 neigh->parms->reachable_time :
1163						 0)));
1164		neigh->nud_state = new;
1165	}
1166
1167	if (lladdr != neigh->ha) {
1168		write_seqlock(&neigh->ha_lock);
1169		memcpy(&neigh->ha, lladdr, dev->addr_len);
1170		write_sequnlock(&neigh->ha_lock);
1171		neigh_update_hhs(neigh);
1172		if (!(new & NUD_CONNECTED))
1173			neigh->confirmed = jiffies -
1174				      (neigh->parms->base_reachable_time << 1);
1175		notify = 1;
1176	}
1177	if (new == old)
1178		goto out;
1179	if (new & NUD_CONNECTED)
1180		neigh_connect(neigh);
1181	else
1182		neigh_suspect(neigh);
1183	if (!(old & NUD_VALID)) {
1184		struct sk_buff *skb;
1185
1186		/* Again: avoid dead loop if something went wrong */
1187
1188		while (neigh->nud_state & NUD_VALID &&
1189		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1190			struct dst_entry *dst = skb_dst(skb);
1191			struct neighbour *n2, *n1 = neigh;
1192			write_unlock_bh(&neigh->lock);
1193
1194			rcu_read_lock();
1195
1196			/* Why not just use 'neigh' as-is?  The problem is that
1197			 * things such as shaper, eql, and sch_teql can end up
1198			 * using alternative, different, neigh objects to output
1199			 * the packet in the output path.  So what we need to do
1200			 * here is re-lookup the top-level neigh in the path so
1201			 * we can reinject the packet there.
1202			 */
1203			n2 = NULL;
1204			if (dst) {
1205				n2 = dst_neigh_lookup_skb(dst, skb);
1206				if (n2)
1207					n1 = n2;
1208			}
1209			n1->output(n1, skb);
1210			if (n2)
1211				neigh_release(n2);
1212			rcu_read_unlock();
1213
1214			write_lock_bh(&neigh->lock);
1215		}
1216		skb_queue_purge(&neigh->arp_queue);
1217		neigh->arp_queue_len_bytes = 0;
1218	}
1219out:
1220	if (update_isrouter) {
1221		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1222			(neigh->flags | NTF_ROUTER) :
1223			(neigh->flags & ~NTF_ROUTER);
1224	}
1225	write_unlock_bh(&neigh->lock);
1226
1227	if (notify)
1228		neigh_update_notify(neigh);
1229
1230	return err;
1231}
1232EXPORT_SYMBOL(neigh_update);
1233
1234struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1235				 u8 *lladdr, void *saddr,
1236				 struct net_device *dev)
1237{
1238	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1239						 lladdr || !dev->addr_len);
1240	if (neigh)
1241		neigh_update(neigh, lladdr, NUD_STALE,
1242			     NEIGH_UPDATE_F_OVERRIDE);
1243	return neigh;
1244}
1245EXPORT_SYMBOL(neigh_event_ns);
1246
1247/* called with read_lock_bh(&n->lock); */
1248static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1249{
1250	struct net_device *dev = dst->dev;
1251	__be16 prot = dst->ops->protocol;
1252	struct hh_cache	*hh = &n->hh;
1253
1254	write_lock_bh(&n->lock);
1255
1256	/* Only one thread can come in here and initialize the
1257	 * hh_cache entry.
1258	 */
1259	if (!hh->hh_len)
1260		dev->header_ops->cache(n, hh, prot);
1261
1262	write_unlock_bh(&n->lock);
1263}
1264
1265/* This function can be used in contexts, where only old dev_queue_xmit
1266 * worked, f.e. if you want to override normal output path (eql, shaper),
1267 * but resolution is not made yet.
1268 */
1269
1270int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1271{
1272	struct net_device *dev = skb->dev;
1273
1274	__skb_pull(skb, skb_network_offset(skb));
1275
1276	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1277			    skb->len) < 0 &&
1278	    dev->header_ops->rebuild(skb))
1279		return 0;
1280
1281	return dev_queue_xmit(skb);
1282}
1283EXPORT_SYMBOL(neigh_compat_output);
1284
1285/* Slow and careful. */
1286
1287int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1288{
1289	struct dst_entry *dst = skb_dst(skb);
1290	int rc = 0;
1291
1292	if (!dst)
1293		goto discard;
1294
1295	if (!neigh_event_send(neigh, skb)) {
1296		int err;
1297		struct net_device *dev = neigh->dev;
1298		unsigned int seq;
1299
1300		if (dev->header_ops->cache && !neigh->hh.hh_len)
1301			neigh_hh_init(neigh, dst);
1302
1303		do {
1304			__skb_pull(skb, skb_network_offset(skb));
1305			seq = read_seqbegin(&neigh->ha_lock);
1306			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1307					      neigh->ha, NULL, skb->len);
1308		} while (read_seqretry(&neigh->ha_lock, seq));
1309
1310		if (err >= 0)
1311			rc = dev_queue_xmit(skb);
1312		else
1313			goto out_kfree_skb;
1314	}
1315out:
1316	return rc;
1317discard:
1318	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1319out_kfree_skb:
1320	rc = -EINVAL;
1321	kfree_skb(skb);
1322	goto out;
1323}
1324EXPORT_SYMBOL(neigh_resolve_output);
1325
1326/* As fast as possible without hh cache */
1327
1328int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1329{
1330	struct net_device *dev = neigh->dev;
1331	unsigned int seq;
1332	int err;
1333
1334	do {
1335		__skb_pull(skb, skb_network_offset(skb));
1336		seq = read_seqbegin(&neigh->ha_lock);
1337		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1338				      neigh->ha, NULL, skb->len);
1339	} while (read_seqretry(&neigh->ha_lock, seq));
1340
1341	if (err >= 0)
1342		err = dev_queue_xmit(skb);
1343	else {
1344		err = -EINVAL;
1345		kfree_skb(skb);
1346	}
1347	return err;
1348}
1349EXPORT_SYMBOL(neigh_connected_output);
1350
1351int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1352{
1353	return dev_queue_xmit(skb);
1354}
1355EXPORT_SYMBOL(neigh_direct_output);
1356
1357static void neigh_proxy_process(unsigned long arg)
1358{
1359	struct neigh_table *tbl = (struct neigh_table *)arg;
1360	long sched_next = 0;
1361	unsigned long now = jiffies;
1362	struct sk_buff *skb, *n;
1363
1364	spin_lock(&tbl->proxy_queue.lock);
1365
1366	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1367		long tdif = NEIGH_CB(skb)->sched_next - now;
1368
1369		if (tdif <= 0) {
1370			struct net_device *dev = skb->dev;
1371
1372			__skb_unlink(skb, &tbl->proxy_queue);
1373			if (tbl->proxy_redo && netif_running(dev)) {
1374				rcu_read_lock();
1375				tbl->proxy_redo(skb);
1376				rcu_read_unlock();
1377			} else {
1378				kfree_skb(skb);
1379			}
1380
1381			dev_put(dev);
1382		} else if (!sched_next || tdif < sched_next)
1383			sched_next = tdif;
1384	}
1385	del_timer(&tbl->proxy_timer);
1386	if (sched_next)
1387		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1388	spin_unlock(&tbl->proxy_queue.lock);
1389}
1390
1391void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1392		    struct sk_buff *skb)
1393{
1394	unsigned long now = jiffies;
1395	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1396
1397	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1398		kfree_skb(skb);
1399		return;
1400	}
1401
1402	NEIGH_CB(skb)->sched_next = sched_next;
1403	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1404
1405	spin_lock(&tbl->proxy_queue.lock);
1406	if (del_timer(&tbl->proxy_timer)) {
1407		if (time_before(tbl->proxy_timer.expires, sched_next))
1408			sched_next = tbl->proxy_timer.expires;
1409	}
1410	skb_dst_drop(skb);
1411	dev_hold(skb->dev);
1412	__skb_queue_tail(&tbl->proxy_queue, skb);
1413	mod_timer(&tbl->proxy_timer, sched_next);
1414	spin_unlock(&tbl->proxy_queue.lock);
1415}
1416EXPORT_SYMBOL(pneigh_enqueue);
1417
1418static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1419						      struct net *net, int ifindex)
1420{
1421	struct neigh_parms *p;
1422
1423	for (p = &tbl->parms; p; p = p->next) {
1424		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1425		    (!p->dev && !ifindex))
1426			return p;
1427	}
1428
1429	return NULL;
1430}
1431
1432struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1433				      struct neigh_table *tbl)
1434{
1435	struct neigh_parms *p, *ref;
1436	struct net *net = dev_net(dev);
1437	const struct net_device_ops *ops = dev->netdev_ops;
1438
1439	ref = lookup_neigh_parms(tbl, net, 0);
1440	if (!ref)
1441		return NULL;
1442
1443	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1444	if (p) {
1445		p->tbl		  = tbl;
1446		atomic_set(&p->refcnt, 1);
1447		p->reachable_time =
1448				neigh_rand_reach_time(p->base_reachable_time);
1449
1450		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1451			kfree(p);
1452			return NULL;
1453		}
1454
1455		dev_hold(dev);
1456		p->dev = dev;
1457		write_pnet(&p->net, hold_net(net));
1458		p->sysctl_table = NULL;
1459		write_lock_bh(&tbl->lock);
1460		p->next		= tbl->parms.next;
1461		tbl->parms.next = p;
1462		write_unlock_bh(&tbl->lock);
1463	}
1464	return p;
1465}
1466EXPORT_SYMBOL(neigh_parms_alloc);
1467
1468static void neigh_rcu_free_parms(struct rcu_head *head)
1469{
1470	struct neigh_parms *parms =
1471		container_of(head, struct neigh_parms, rcu_head);
1472
1473	neigh_parms_put(parms);
1474}
1475
1476void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1477{
1478	struct neigh_parms **p;
1479
1480	if (!parms || parms == &tbl->parms)
1481		return;
1482	write_lock_bh(&tbl->lock);
1483	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1484		if (*p == parms) {
1485			*p = parms->next;
1486			parms->dead = 1;
1487			write_unlock_bh(&tbl->lock);
1488			if (parms->dev)
1489				dev_put(parms->dev);
1490			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1491			return;
1492		}
1493	}
1494	write_unlock_bh(&tbl->lock);
1495	neigh_dbg(1, "%s: not found\n", __func__);
1496}
1497EXPORT_SYMBOL(neigh_parms_release);
1498
1499static void neigh_parms_destroy(struct neigh_parms *parms)
1500{
1501	release_net(neigh_parms_net(parms));
1502	kfree(parms);
1503}
1504
1505static struct lock_class_key neigh_table_proxy_queue_class;
1506
1507static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1508{
1509	unsigned long now = jiffies;
1510	unsigned long phsize;
1511
1512	write_pnet(&tbl->parms.net, &init_net);
1513	atomic_set(&tbl->parms.refcnt, 1);
1514	tbl->parms.reachable_time =
1515			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1516
1517	tbl->stats = alloc_percpu(struct neigh_statistics);
1518	if (!tbl->stats)
1519		panic("cannot create neighbour cache statistics");
1520
1521#ifdef CONFIG_PROC_FS
1522	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1523			      &neigh_stat_seq_fops, tbl))
1524		panic("cannot create neighbour proc dir entry");
1525#endif
1526
1527	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1528
1529	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1530	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1531
1532	if (!tbl->nht || !tbl->phash_buckets)
1533		panic("cannot allocate neighbour cache hashes");
1534
1535	if (!tbl->entry_size)
1536		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1537					tbl->key_len, NEIGH_PRIV_ALIGN);
1538	else
1539		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1540
1541	rwlock_init(&tbl->lock);
1542	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1543	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1544	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1545	skb_queue_head_init_class(&tbl->proxy_queue,
1546			&neigh_table_proxy_queue_class);
1547
1548	tbl->last_flush = now;
1549	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1550}
1551
1552void neigh_table_init(struct neigh_table *tbl)
1553{
1554	struct neigh_table *tmp;
1555
1556	neigh_table_init_no_netlink(tbl);
1557	write_lock(&neigh_tbl_lock);
1558	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1559		if (tmp->family == tbl->family)
1560			break;
1561	}
1562	tbl->next	= neigh_tables;
1563	neigh_tables	= tbl;
1564	write_unlock(&neigh_tbl_lock);
1565
1566	if (unlikely(tmp)) {
1567		pr_err("Registering multiple tables for family %d\n",
1568		       tbl->family);
1569		dump_stack();
1570	}
1571}
1572EXPORT_SYMBOL(neigh_table_init);
1573
1574int neigh_table_clear(struct neigh_table *tbl)
1575{
1576	struct neigh_table **tp;
1577
1578	/* It is not clean... Fix it to unload IPv6 module safely */
1579	cancel_delayed_work_sync(&tbl->gc_work);
1580	del_timer_sync(&tbl->proxy_timer);
1581	pneigh_queue_purge(&tbl->proxy_queue);
1582	neigh_ifdown(tbl, NULL);
1583	if (atomic_read(&tbl->entries))
1584		pr_crit("neighbour leakage\n");
1585	write_lock(&neigh_tbl_lock);
1586	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1587		if (*tp == tbl) {
1588			*tp = tbl->next;
1589			break;
1590		}
1591	}
1592	write_unlock(&neigh_tbl_lock);
1593
1594	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1595		 neigh_hash_free_rcu);
1596	tbl->nht = NULL;
1597
1598	kfree(tbl->phash_buckets);
1599	tbl->phash_buckets = NULL;
1600
1601	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1602
1603	free_percpu(tbl->stats);
1604	tbl->stats = NULL;
1605
1606	return 0;
1607}
1608EXPORT_SYMBOL(neigh_table_clear);
1609
1610static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1611{
1612	struct net *net = sock_net(skb->sk);
1613	struct ndmsg *ndm;
1614	struct nlattr *dst_attr;
1615	struct neigh_table *tbl;
1616	struct net_device *dev = NULL;
1617	int err = -EINVAL;
1618
1619	ASSERT_RTNL();
1620	if (nlmsg_len(nlh) < sizeof(*ndm))
1621		goto out;
1622
1623	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1624	if (dst_attr == NULL)
1625		goto out;
1626
1627	ndm = nlmsg_data(nlh);
1628	if (ndm->ndm_ifindex) {
1629		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1630		if (dev == NULL) {
1631			err = -ENODEV;
1632			goto out;
1633		}
1634	}
1635
1636	read_lock(&neigh_tbl_lock);
1637	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1638		struct neighbour *neigh;
1639
1640		if (tbl->family != ndm->ndm_family)
1641			continue;
1642		read_unlock(&neigh_tbl_lock);
1643
1644		if (nla_len(dst_attr) < tbl->key_len)
1645			goto out;
1646
1647		if (ndm->ndm_flags & NTF_PROXY) {
1648			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1649			goto out;
1650		}
1651
1652		if (dev == NULL)
1653			goto out;
1654
1655		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1656		if (neigh == NULL) {
1657			err = -ENOENT;
1658			goto out;
1659		}
1660
1661		err = neigh_update(neigh, NULL, NUD_FAILED,
1662				   NEIGH_UPDATE_F_OVERRIDE |
1663				   NEIGH_UPDATE_F_ADMIN);
1664		neigh_release(neigh);
1665		goto out;
1666	}
1667	read_unlock(&neigh_tbl_lock);
1668	err = -EAFNOSUPPORT;
1669
1670out:
1671	return err;
1672}
1673
1674static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1675{
1676	struct net *net = sock_net(skb->sk);
1677	struct ndmsg *ndm;
1678	struct nlattr *tb[NDA_MAX+1];
1679	struct neigh_table *tbl;
1680	struct net_device *dev = NULL;
1681	int err;
1682
1683	ASSERT_RTNL();
1684	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1685	if (err < 0)
1686		goto out;
1687
1688	err = -EINVAL;
1689	if (tb[NDA_DST] == NULL)
1690		goto out;
1691
1692	ndm = nlmsg_data(nlh);
1693	if (ndm->ndm_ifindex) {
1694		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1695		if (dev == NULL) {
1696			err = -ENODEV;
1697			goto out;
1698		}
1699
1700		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1701			goto out;
1702	}
1703
1704	read_lock(&neigh_tbl_lock);
1705	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1706		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1707		struct neighbour *neigh;
1708		void *dst, *lladdr;
1709
1710		if (tbl->family != ndm->ndm_family)
1711			continue;
1712		read_unlock(&neigh_tbl_lock);
1713
1714		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1715			goto out;
1716		dst = nla_data(tb[NDA_DST]);
1717		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1718
1719		if (ndm->ndm_flags & NTF_PROXY) {
1720			struct pneigh_entry *pn;
1721
1722			err = -ENOBUFS;
1723			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1724			if (pn) {
1725				pn->flags = ndm->ndm_flags;
1726				err = 0;
1727			}
1728			goto out;
1729		}
1730
1731		if (dev == NULL)
1732			goto out;
1733
1734		neigh = neigh_lookup(tbl, dst, dev);
1735		if (neigh == NULL) {
1736			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1737				err = -ENOENT;
1738				goto out;
1739			}
1740
1741			neigh = __neigh_lookup_errno(tbl, dst, dev);
1742			if (IS_ERR(neigh)) {
1743				err = PTR_ERR(neigh);
1744				goto out;
1745			}
1746		} else {
1747			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1748				err = -EEXIST;
1749				neigh_release(neigh);
1750				goto out;
1751			}
1752
1753			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1754				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1755		}
1756
1757		if (ndm->ndm_flags & NTF_USE) {
1758			neigh_event_send(neigh, NULL);
1759			err = 0;
1760		} else
1761			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1762		neigh_release(neigh);
1763		goto out;
1764	}
1765
1766	read_unlock(&neigh_tbl_lock);
1767	err = -EAFNOSUPPORT;
1768out:
1769	return err;
1770}
1771
1772static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1773{
1774	struct nlattr *nest;
1775
1776	nest = nla_nest_start(skb, NDTA_PARMS);
1777	if (nest == NULL)
1778		return -ENOBUFS;
1779
1780	if ((parms->dev &&
1781	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1782	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1783	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1784	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1785	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1786			parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1787	    nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1788	    nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1789	    nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1790	    nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1791	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1792	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1793			  parms->base_reachable_time) ||
1794	    nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1795	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1796			  parms->delay_probe_time) ||
1797	    nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1798	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1799	    nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1800	    nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1801		goto nla_put_failure;
1802	return nla_nest_end(skb, nest);
1803
1804nla_put_failure:
1805	nla_nest_cancel(skb, nest);
1806	return -EMSGSIZE;
1807}
1808
1809static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1810			      u32 pid, u32 seq, int type, int flags)
1811{
1812	struct nlmsghdr *nlh;
1813	struct ndtmsg *ndtmsg;
1814
1815	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1816	if (nlh == NULL)
1817		return -EMSGSIZE;
1818
1819	ndtmsg = nlmsg_data(nlh);
1820
1821	read_lock_bh(&tbl->lock);
1822	ndtmsg->ndtm_family = tbl->family;
1823	ndtmsg->ndtm_pad1   = 0;
1824	ndtmsg->ndtm_pad2   = 0;
1825
1826	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1827	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1828	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1829	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1830	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1831		goto nla_put_failure;
1832	{
1833		unsigned long now = jiffies;
1834		unsigned int flush_delta = now - tbl->last_flush;
1835		unsigned int rand_delta = now - tbl->last_rand;
1836		struct neigh_hash_table *nht;
1837		struct ndt_config ndc = {
1838			.ndtc_key_len		= tbl->key_len,
1839			.ndtc_entry_size	= tbl->entry_size,
1840			.ndtc_entries		= atomic_read(&tbl->entries),
1841			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1842			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1843			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1844		};
1845
1846		rcu_read_lock_bh();
1847		nht = rcu_dereference_bh(tbl->nht);
1848		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1849		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1850		rcu_read_unlock_bh();
1851
1852		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1853			goto nla_put_failure;
1854	}
1855
1856	{
1857		int cpu;
1858		struct ndt_stats ndst;
1859
1860		memset(&ndst, 0, sizeof(ndst));
1861
1862		for_each_possible_cpu(cpu) {
1863			struct neigh_statistics	*st;
1864
1865			st = per_cpu_ptr(tbl->stats, cpu);
1866			ndst.ndts_allocs		+= st->allocs;
1867			ndst.ndts_destroys		+= st->destroys;
1868			ndst.ndts_hash_grows		+= st->hash_grows;
1869			ndst.ndts_res_failed		+= st->res_failed;
1870			ndst.ndts_lookups		+= st->lookups;
1871			ndst.ndts_hits			+= st->hits;
1872			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1873			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1874			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1875			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1876		}
1877
1878		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1879			goto nla_put_failure;
1880	}
1881
1882	BUG_ON(tbl->parms.dev);
1883	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1884		goto nla_put_failure;
1885
1886	read_unlock_bh(&tbl->lock);
1887	return nlmsg_end(skb, nlh);
1888
1889nla_put_failure:
1890	read_unlock_bh(&tbl->lock);
1891	nlmsg_cancel(skb, nlh);
1892	return -EMSGSIZE;
1893}
1894
1895static int neightbl_fill_param_info(struct sk_buff *skb,
1896				    struct neigh_table *tbl,
1897				    struct neigh_parms *parms,
1898				    u32 pid, u32 seq, int type,
1899				    unsigned int flags)
1900{
1901	struct ndtmsg *ndtmsg;
1902	struct nlmsghdr *nlh;
1903
1904	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1905	if (nlh == NULL)
1906		return -EMSGSIZE;
1907
1908	ndtmsg = nlmsg_data(nlh);
1909
1910	read_lock_bh(&tbl->lock);
1911	ndtmsg->ndtm_family = tbl->family;
1912	ndtmsg->ndtm_pad1   = 0;
1913	ndtmsg->ndtm_pad2   = 0;
1914
1915	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1916	    neightbl_fill_parms(skb, parms) < 0)
1917		goto errout;
1918
1919	read_unlock_bh(&tbl->lock);
1920	return nlmsg_end(skb, nlh);
1921errout:
1922	read_unlock_bh(&tbl->lock);
1923	nlmsg_cancel(skb, nlh);
1924	return -EMSGSIZE;
1925}
1926
1927static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1928	[NDTA_NAME]		= { .type = NLA_STRING },
1929	[NDTA_THRESH1]		= { .type = NLA_U32 },
1930	[NDTA_THRESH2]		= { .type = NLA_U32 },
1931	[NDTA_THRESH3]		= { .type = NLA_U32 },
1932	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1933	[NDTA_PARMS]		= { .type = NLA_NESTED },
1934};
1935
1936static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1937	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1938	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1939	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1940	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1941	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1942	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1943	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1944	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1945	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1946	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1947	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1948	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1949	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1950};
1951
1952static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1953{
1954	struct net *net = sock_net(skb->sk);
1955	struct neigh_table *tbl;
1956	struct ndtmsg *ndtmsg;
1957	struct nlattr *tb[NDTA_MAX+1];
1958	int err;
1959
1960	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1961			  nl_neightbl_policy);
1962	if (err < 0)
1963		goto errout;
1964
1965	if (tb[NDTA_NAME] == NULL) {
1966		err = -EINVAL;
1967		goto errout;
1968	}
1969
1970	ndtmsg = nlmsg_data(nlh);
1971	read_lock(&neigh_tbl_lock);
1972	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1973		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1974			continue;
1975
1976		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1977			break;
1978	}
1979
1980	if (tbl == NULL) {
1981		err = -ENOENT;
1982		goto errout_locked;
1983	}
1984
1985	/*
1986	 * We acquire tbl->lock to be nice to the periodic timers and
1987	 * make sure they always see a consistent set of values.
1988	 */
1989	write_lock_bh(&tbl->lock);
1990
1991	if (tb[NDTA_PARMS]) {
1992		struct nlattr *tbp[NDTPA_MAX+1];
1993		struct neigh_parms *p;
1994		int i, ifindex = 0;
1995
1996		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1997				       nl_ntbl_parm_policy);
1998		if (err < 0)
1999			goto errout_tbl_lock;
2000
2001		if (tbp[NDTPA_IFINDEX])
2002			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2003
2004		p = lookup_neigh_parms(tbl, net, ifindex);
2005		if (p == NULL) {
2006			err = -ENOENT;
2007			goto errout_tbl_lock;
2008		}
2009
2010		for (i = 1; i <= NDTPA_MAX; i++) {
2011			if (tbp[i] == NULL)
2012				continue;
2013
2014			switch (i) {
2015			case NDTPA_QUEUE_LEN:
2016				p->queue_len_bytes = nla_get_u32(tbp[i]) *
2017						     SKB_TRUESIZE(ETH_FRAME_LEN);
2018				break;
2019			case NDTPA_QUEUE_LENBYTES:
2020				p->queue_len_bytes = nla_get_u32(tbp[i]);
2021				break;
2022			case NDTPA_PROXY_QLEN:
2023				p->proxy_qlen = nla_get_u32(tbp[i]);
2024				break;
2025			case NDTPA_APP_PROBES:
2026				p->app_probes = nla_get_u32(tbp[i]);
2027				break;
2028			case NDTPA_UCAST_PROBES:
2029				p->ucast_probes = nla_get_u32(tbp[i]);
2030				break;
2031			case NDTPA_MCAST_PROBES:
2032				p->mcast_probes = nla_get_u32(tbp[i]);
2033				break;
2034			case NDTPA_BASE_REACHABLE_TIME:
2035				p->base_reachable_time = nla_get_msecs(tbp[i]);
2036				break;
2037			case NDTPA_GC_STALETIME:
2038				p->gc_staletime = nla_get_msecs(tbp[i]);
2039				break;
2040			case NDTPA_DELAY_PROBE_TIME:
2041				p->delay_probe_time = nla_get_msecs(tbp[i]);
2042				break;
2043			case NDTPA_RETRANS_TIME:
2044				p->retrans_time = nla_get_msecs(tbp[i]);
2045				break;
2046			case NDTPA_ANYCAST_DELAY:
2047				p->anycast_delay = nla_get_msecs(tbp[i]);
2048				break;
2049			case NDTPA_PROXY_DELAY:
2050				p->proxy_delay = nla_get_msecs(tbp[i]);
2051				break;
2052			case NDTPA_LOCKTIME:
2053				p->locktime = nla_get_msecs(tbp[i]);
2054				break;
2055			}
2056		}
2057	}
2058
2059	if (tb[NDTA_THRESH1])
2060		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2061
2062	if (tb[NDTA_THRESH2])
2063		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2064
2065	if (tb[NDTA_THRESH3])
2066		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2067
2068	if (tb[NDTA_GC_INTERVAL])
2069		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2070
2071	err = 0;
2072
2073errout_tbl_lock:
2074	write_unlock_bh(&tbl->lock);
2075errout_locked:
2076	read_unlock(&neigh_tbl_lock);
2077errout:
2078	return err;
2079}
2080
2081static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2082{
2083	struct net *net = sock_net(skb->sk);
2084	int family, tidx, nidx = 0;
2085	int tbl_skip = cb->args[0];
2086	int neigh_skip = cb->args[1];
2087	struct neigh_table *tbl;
2088
2089	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2090
2091	read_lock(&neigh_tbl_lock);
2092	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2093		struct neigh_parms *p;
2094
2095		if (tidx < tbl_skip || (family && tbl->family != family))
2096			continue;
2097
2098		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2099				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2100				       NLM_F_MULTI) <= 0)
2101			break;
2102
2103		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2104			if (!net_eq(neigh_parms_net(p), net))
2105				continue;
2106
2107			if (nidx < neigh_skip)
2108				goto next;
2109
2110			if (neightbl_fill_param_info(skb, tbl, p,
2111						     NETLINK_CB(cb->skb).portid,
2112						     cb->nlh->nlmsg_seq,
2113						     RTM_NEWNEIGHTBL,
2114						     NLM_F_MULTI) <= 0)
2115				goto out;
2116		next:
2117			nidx++;
2118		}
2119
2120		neigh_skip = 0;
2121	}
2122out:
2123	read_unlock(&neigh_tbl_lock);
2124	cb->args[0] = tidx;
2125	cb->args[1] = nidx;
2126
2127	return skb->len;
2128}
2129
2130static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2131			   u32 pid, u32 seq, int type, unsigned int flags)
2132{
2133	unsigned long now = jiffies;
2134	struct nda_cacheinfo ci;
2135	struct nlmsghdr *nlh;
2136	struct ndmsg *ndm;
2137
2138	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2139	if (nlh == NULL)
2140		return -EMSGSIZE;
2141
2142	ndm = nlmsg_data(nlh);
2143	ndm->ndm_family	 = neigh->ops->family;
2144	ndm->ndm_pad1    = 0;
2145	ndm->ndm_pad2    = 0;
2146	ndm->ndm_flags	 = neigh->flags;
2147	ndm->ndm_type	 = neigh->type;
2148	ndm->ndm_ifindex = neigh->dev->ifindex;
2149
2150	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2151		goto nla_put_failure;
2152
2153	read_lock_bh(&neigh->lock);
2154	ndm->ndm_state	 = neigh->nud_state;
2155	if (neigh->nud_state & NUD_VALID) {
2156		char haddr[MAX_ADDR_LEN];
2157
2158		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2159		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2160			read_unlock_bh(&neigh->lock);
2161			goto nla_put_failure;
2162		}
2163	}
2164
2165	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2166	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2167	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2168	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2169	read_unlock_bh(&neigh->lock);
2170
2171	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2172	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2173		goto nla_put_failure;
2174
2175	return nlmsg_end(skb, nlh);
2176
2177nla_put_failure:
2178	nlmsg_cancel(skb, nlh);
2179	return -EMSGSIZE;
2180}
2181
2182static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2183			    u32 pid, u32 seq, int type, unsigned int flags,
2184			    struct neigh_table *tbl)
2185{
2186	struct nlmsghdr *nlh;
2187	struct ndmsg *ndm;
2188
2189	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2190	if (nlh == NULL)
2191		return -EMSGSIZE;
2192
2193	ndm = nlmsg_data(nlh);
2194	ndm->ndm_family	 = tbl->family;
2195	ndm->ndm_pad1    = 0;
2196	ndm->ndm_pad2    = 0;
2197	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2198	ndm->ndm_type	 = NDA_DST;
2199	ndm->ndm_ifindex = pn->dev->ifindex;
2200	ndm->ndm_state	 = NUD_NONE;
2201
2202	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2203		goto nla_put_failure;
2204
2205	return nlmsg_end(skb, nlh);
2206
2207nla_put_failure:
2208	nlmsg_cancel(skb, nlh);
2209	return -EMSGSIZE;
2210}
2211
2212static void neigh_update_notify(struct neighbour *neigh)
2213{
2214	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2215	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2216}
2217
2218static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2219			    struct netlink_callback *cb)
2220{
2221	struct net *net = sock_net(skb->sk);
2222	struct neighbour *n;
2223	int rc, h, s_h = cb->args[1];
2224	int idx, s_idx = idx = cb->args[2];
2225	struct neigh_hash_table *nht;
2226
2227	rcu_read_lock_bh();
2228	nht = rcu_dereference_bh(tbl->nht);
2229
2230	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2231		if (h > s_h)
2232			s_idx = 0;
2233		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2234		     n != NULL;
2235		     n = rcu_dereference_bh(n->next)) {
2236			if (!net_eq(dev_net(n->dev), net))
2237				continue;
2238			if (idx < s_idx)
2239				goto next;
2240			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2241					    cb->nlh->nlmsg_seq,
2242					    RTM_NEWNEIGH,
2243					    NLM_F_MULTI) <= 0) {
2244				rc = -1;
2245				goto out;
2246			}
2247next:
2248			idx++;
2249		}
2250	}
2251	rc = skb->len;
2252out:
2253	rcu_read_unlock_bh();
2254	cb->args[1] = h;
2255	cb->args[2] = idx;
2256	return rc;
2257}
2258
2259static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2260			     struct netlink_callback *cb)
2261{
2262	struct pneigh_entry *n;
2263	struct net *net = sock_net(skb->sk);
2264	int rc, h, s_h = cb->args[3];
2265	int idx, s_idx = idx = cb->args[4];
2266
2267	read_lock_bh(&tbl->lock);
2268
2269	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2270		if (h > s_h)
2271			s_idx = 0;
2272		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2273			if (dev_net(n->dev) != net)
2274				continue;
2275			if (idx < s_idx)
2276				goto next;
2277			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2278					    cb->nlh->nlmsg_seq,
2279					    RTM_NEWNEIGH,
2280					    NLM_F_MULTI, tbl) <= 0) {
2281				read_unlock_bh(&tbl->lock);
2282				rc = -1;
2283				goto out;
2284			}
2285		next:
2286			idx++;
2287		}
2288	}
2289
2290	read_unlock_bh(&tbl->lock);
2291	rc = skb->len;
2292out:
2293	cb->args[3] = h;
2294	cb->args[4] = idx;
2295	return rc;
2296
2297}
2298
2299static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2300{
2301	struct neigh_table *tbl;
2302	int t, family, s_t;
2303	int proxy = 0;
2304	int err;
2305
2306	read_lock(&neigh_tbl_lock);
2307	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2308
2309	/* check for full ndmsg structure presence, family member is
2310	 * the same for both structures
2311	 */
2312	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2313	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2314		proxy = 1;
2315
2316	s_t = cb->args[0];
2317
2318	for (tbl = neigh_tables, t = 0; tbl;
2319	     tbl = tbl->next, t++) {
2320		if (t < s_t || (family && tbl->family != family))
2321			continue;
2322		if (t > s_t)
2323			memset(&cb->args[1], 0, sizeof(cb->args) -
2324						sizeof(cb->args[0]));
2325		if (proxy)
2326			err = pneigh_dump_table(tbl, skb, cb);
2327		else
2328			err = neigh_dump_table(tbl, skb, cb);
2329		if (err < 0)
2330			break;
2331	}
2332	read_unlock(&neigh_tbl_lock);
2333
2334	cb->args[0] = t;
2335	return skb->len;
2336}
2337
2338void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2339{
2340	int chain;
2341	struct neigh_hash_table *nht;
2342
2343	rcu_read_lock_bh();
2344	nht = rcu_dereference_bh(tbl->nht);
2345
2346	read_lock(&tbl->lock); /* avoid resizes */
2347	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2348		struct neighbour *n;
2349
2350		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2351		     n != NULL;
2352		     n = rcu_dereference_bh(n->next))
2353			cb(n, cookie);
2354	}
2355	read_unlock(&tbl->lock);
2356	rcu_read_unlock_bh();
2357}
2358EXPORT_SYMBOL(neigh_for_each);
2359
2360/* The tbl->lock must be held as a writer and BH disabled. */
2361void __neigh_for_each_release(struct neigh_table *tbl,
2362			      int (*cb)(struct neighbour *))
2363{
2364	int chain;
2365	struct neigh_hash_table *nht;
2366
2367	nht = rcu_dereference_protected(tbl->nht,
2368					lockdep_is_held(&tbl->lock));
2369	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2370		struct neighbour *n;
2371		struct neighbour __rcu **np;
2372
2373		np = &nht->hash_buckets[chain];
2374		while ((n = rcu_dereference_protected(*np,
2375					lockdep_is_held(&tbl->lock))) != NULL) {
2376			int release;
2377
2378			write_lock(&n->lock);
2379			release = cb(n);
2380			if (release) {
2381				rcu_assign_pointer(*np,
2382					rcu_dereference_protected(n->next,
2383						lockdep_is_held(&tbl->lock)));
2384				n->dead = 1;
2385			} else
2386				np = &n->next;
2387			write_unlock(&n->lock);
2388			if (release)
2389				neigh_cleanup_and_release(n);
2390		}
2391	}
2392}
2393EXPORT_SYMBOL(__neigh_for_each_release);
2394
2395#ifdef CONFIG_PROC_FS
2396
2397static struct neighbour *neigh_get_first(struct seq_file *seq)
2398{
2399	struct neigh_seq_state *state = seq->private;
2400	struct net *net = seq_file_net(seq);
2401	struct neigh_hash_table *nht = state->nht;
2402	struct neighbour *n = NULL;
2403	int bucket = state->bucket;
2404
2405	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2406	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2407		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2408
2409		while (n) {
2410			if (!net_eq(dev_net(n->dev), net))
2411				goto next;
2412			if (state->neigh_sub_iter) {
2413				loff_t fakep = 0;
2414				void *v;
2415
2416				v = state->neigh_sub_iter(state, n, &fakep);
2417				if (!v)
2418					goto next;
2419			}
2420			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2421				break;
2422			if (n->nud_state & ~NUD_NOARP)
2423				break;
2424next:
2425			n = rcu_dereference_bh(n->next);
2426		}
2427
2428		if (n)
2429			break;
2430	}
2431	state->bucket = bucket;
2432
2433	return n;
2434}
2435
2436static struct neighbour *neigh_get_next(struct seq_file *seq,
2437					struct neighbour *n,
2438					loff_t *pos)
2439{
2440	struct neigh_seq_state *state = seq->private;
2441	struct net *net = seq_file_net(seq);
2442	struct neigh_hash_table *nht = state->nht;
2443
2444	if (state->neigh_sub_iter) {
2445		void *v = state->neigh_sub_iter(state, n, pos);
2446		if (v)
2447			return n;
2448	}
2449	n = rcu_dereference_bh(n->next);
2450
2451	while (1) {
2452		while (n) {
2453			if (!net_eq(dev_net(n->dev), net))
2454				goto next;
2455			if (state->neigh_sub_iter) {
2456				void *v = state->neigh_sub_iter(state, n, pos);
2457				if (v)
2458					return n;
2459				goto next;
2460			}
2461			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2462				break;
2463
2464			if (n->nud_state & ~NUD_NOARP)
2465				break;
2466next:
2467			n = rcu_dereference_bh(n->next);
2468		}
2469
2470		if (n)
2471			break;
2472
2473		if (++state->bucket >= (1 << nht->hash_shift))
2474			break;
2475
2476		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2477	}
2478
2479	if (n && pos)
2480		--(*pos);
2481	return n;
2482}
2483
2484static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2485{
2486	struct neighbour *n = neigh_get_first(seq);
2487
2488	if (n) {
2489		--(*pos);
2490		while (*pos) {
2491			n = neigh_get_next(seq, n, pos);
2492			if (!n)
2493				break;
2494		}
2495	}
2496	return *pos ? NULL : n;
2497}
2498
2499static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2500{
2501	struct neigh_seq_state *state = seq->private;
2502	struct net *net = seq_file_net(seq);
2503	struct neigh_table *tbl = state->tbl;
2504	struct pneigh_entry *pn = NULL;
2505	int bucket = state->bucket;
2506
2507	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2508	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2509		pn = tbl->phash_buckets[bucket];
2510		while (pn && !net_eq(pneigh_net(pn), net))
2511			pn = pn->next;
2512		if (pn)
2513			break;
2514	}
2515	state->bucket = bucket;
2516
2517	return pn;
2518}
2519
2520static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2521					    struct pneigh_entry *pn,
2522					    loff_t *pos)
2523{
2524	struct neigh_seq_state *state = seq->private;
2525	struct net *net = seq_file_net(seq);
2526	struct neigh_table *tbl = state->tbl;
2527
2528	do {
2529		pn = pn->next;
2530	} while (pn && !net_eq(pneigh_net(pn), net));
2531
2532	while (!pn) {
2533		if (++state->bucket > PNEIGH_HASHMASK)
2534			break;
2535		pn = tbl->phash_buckets[state->bucket];
2536		while (pn && !net_eq(pneigh_net(pn), net))
2537			pn = pn->next;
2538		if (pn)
2539			break;
2540	}
2541
2542	if (pn && pos)
2543		--(*pos);
2544
2545	return pn;
2546}
2547
2548static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2549{
2550	struct pneigh_entry *pn = pneigh_get_first(seq);
2551
2552	if (pn) {
2553		--(*pos);
2554		while (*pos) {
2555			pn = pneigh_get_next(seq, pn, pos);
2556			if (!pn)
2557				break;
2558		}
2559	}
2560	return *pos ? NULL : pn;
2561}
2562
2563static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2564{
2565	struct neigh_seq_state *state = seq->private;
2566	void *rc;
2567	loff_t idxpos = *pos;
2568
2569	rc = neigh_get_idx(seq, &idxpos);
2570	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2571		rc = pneigh_get_idx(seq, &idxpos);
2572
2573	return rc;
2574}
2575
2576void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2577	__acquires(rcu_bh)
2578{
2579	struct neigh_seq_state *state = seq->private;
2580
2581	state->tbl = tbl;
2582	state->bucket = 0;
2583	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2584
2585	rcu_read_lock_bh();
2586	state->nht = rcu_dereference_bh(tbl->nht);
2587
2588	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2589}
2590EXPORT_SYMBOL(neigh_seq_start);
2591
2592void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2593{
2594	struct neigh_seq_state *state;
2595	void *rc;
2596
2597	if (v == SEQ_START_TOKEN) {
2598		rc = neigh_get_first(seq);
2599		goto out;
2600	}
2601
2602	state = seq->private;
2603	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2604		rc = neigh_get_next(seq, v, NULL);
2605		if (rc)
2606			goto out;
2607		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2608			rc = pneigh_get_first(seq);
2609	} else {
2610		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2611		rc = pneigh_get_next(seq, v, NULL);
2612	}
2613out:
2614	++(*pos);
2615	return rc;
2616}
2617EXPORT_SYMBOL(neigh_seq_next);
2618
2619void neigh_seq_stop(struct seq_file *seq, void *v)
2620	__releases(rcu_bh)
2621{
2622	rcu_read_unlock_bh();
2623}
2624EXPORT_SYMBOL(neigh_seq_stop);
2625
2626/* statistics via seq_file */
2627
2628static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2629{
2630	struct neigh_table *tbl = seq->private;
2631	int cpu;
2632
2633	if (*pos == 0)
2634		return SEQ_START_TOKEN;
2635
2636	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2637		if (!cpu_possible(cpu))
2638			continue;
2639		*pos = cpu+1;
2640		return per_cpu_ptr(tbl->stats, cpu);
2641	}
2642	return NULL;
2643}
2644
2645static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2646{
2647	struct neigh_table *tbl = seq->private;
2648	int cpu;
2649
2650	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2651		if (!cpu_possible(cpu))
2652			continue;
2653		*pos = cpu+1;
2654		return per_cpu_ptr(tbl->stats, cpu);
2655	}
2656	return NULL;
2657}
2658
2659static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2660{
2661
2662}
2663
2664static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2665{
2666	struct neigh_table *tbl = seq->private;
2667	struct neigh_statistics *st = v;
2668
2669	if (v == SEQ_START_TOKEN) {
2670		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2671		return 0;
2672	}
2673
2674	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2675			"%08lx %08lx  %08lx %08lx %08lx\n",
2676		   atomic_read(&tbl->entries),
2677
2678		   st->allocs,
2679		   st->destroys,
2680		   st->hash_grows,
2681
2682		   st->lookups,
2683		   st->hits,
2684
2685		   st->res_failed,
2686
2687		   st->rcv_probes_mcast,
2688		   st->rcv_probes_ucast,
2689
2690		   st->periodic_gc_runs,
2691		   st->forced_gc_runs,
2692		   st->unres_discards
2693		   );
2694
2695	return 0;
2696}
2697
2698static const struct seq_operations neigh_stat_seq_ops = {
2699	.start	= neigh_stat_seq_start,
2700	.next	= neigh_stat_seq_next,
2701	.stop	= neigh_stat_seq_stop,
2702	.show	= neigh_stat_seq_show,
2703};
2704
2705static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2706{
2707	int ret = seq_open(file, &neigh_stat_seq_ops);
2708
2709	if (!ret) {
2710		struct seq_file *sf = file->private_data;
2711		sf->private = PDE_DATA(inode);
2712	}
2713	return ret;
2714};
2715
2716static const struct file_operations neigh_stat_seq_fops = {
2717	.owner	 = THIS_MODULE,
2718	.open 	 = neigh_stat_seq_open,
2719	.read	 = seq_read,
2720	.llseek	 = seq_lseek,
2721	.release = seq_release,
2722};
2723
2724#endif /* CONFIG_PROC_FS */
2725
2726static inline size_t neigh_nlmsg_size(void)
2727{
2728	return NLMSG_ALIGN(sizeof(struct ndmsg))
2729	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2730	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2731	       + nla_total_size(sizeof(struct nda_cacheinfo))
2732	       + nla_total_size(4); /* NDA_PROBES */
2733}
2734
2735static void __neigh_notify(struct neighbour *n, int type, int flags)
2736{
2737	struct net *net = dev_net(n->dev);
2738	struct sk_buff *skb;
2739	int err = -ENOBUFS;
2740
2741	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2742	if (skb == NULL)
2743		goto errout;
2744
2745	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2746	if (err < 0) {
2747		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2748		WARN_ON(err == -EMSGSIZE);
2749		kfree_skb(skb);
2750		goto errout;
2751	}
2752	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2753	return;
2754errout:
2755	if (err < 0)
2756		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2757}
2758
2759#ifdef CONFIG_ARPD
2760void neigh_app_ns(struct neighbour *n)
2761{
2762	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2763}
2764EXPORT_SYMBOL(neigh_app_ns);
2765#endif /* CONFIG_ARPD */
2766
2767#ifdef CONFIG_SYSCTL
2768static int zero;
2769static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2770
2771static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2772			   size_t *lenp, loff_t *ppos)
2773{
2774	int size, ret;
2775	ctl_table tmp = *ctl;
2776
2777	tmp.extra1 = &zero;
2778	tmp.extra2 = &unres_qlen_max;
2779	tmp.data = &size;
2780
2781	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2782	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2783
2784	if (write && !ret)
2785		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2786	return ret;
2787}
2788
2789enum {
2790	NEIGH_VAR_MCAST_PROBE,
2791	NEIGH_VAR_UCAST_PROBE,
2792	NEIGH_VAR_APP_PROBE,
2793	NEIGH_VAR_RETRANS_TIME,
2794	NEIGH_VAR_BASE_REACHABLE_TIME,
2795	NEIGH_VAR_DELAY_PROBE_TIME,
2796	NEIGH_VAR_GC_STALETIME,
2797	NEIGH_VAR_QUEUE_LEN,
2798	NEIGH_VAR_QUEUE_LEN_BYTES,
2799	NEIGH_VAR_PROXY_QLEN,
2800	NEIGH_VAR_ANYCAST_DELAY,
2801	NEIGH_VAR_PROXY_DELAY,
2802	NEIGH_VAR_LOCKTIME,
2803	NEIGH_VAR_RETRANS_TIME_MS,
2804	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2805	NEIGH_VAR_GC_INTERVAL,
2806	NEIGH_VAR_GC_THRESH1,
2807	NEIGH_VAR_GC_THRESH2,
2808	NEIGH_VAR_GC_THRESH3,
2809	NEIGH_VAR_MAX
2810};
2811
2812static struct neigh_sysctl_table {
2813	struct ctl_table_header *sysctl_header;
2814	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2815} neigh_sysctl_template __read_mostly = {
2816	.neigh_vars = {
2817		[NEIGH_VAR_MCAST_PROBE] = {
2818			.procname	= "mcast_solicit",
2819			.maxlen		= sizeof(int),
2820			.mode		= 0644,
2821			.proc_handler	= proc_dointvec,
2822		},
2823		[NEIGH_VAR_UCAST_PROBE] = {
2824			.procname	= "ucast_solicit",
2825			.maxlen		= sizeof(int),
2826			.mode		= 0644,
2827			.proc_handler	= proc_dointvec,
2828		},
2829		[NEIGH_VAR_APP_PROBE] = {
2830			.procname	= "app_solicit",
2831			.maxlen		= sizeof(int),
2832			.mode		= 0644,
2833			.proc_handler	= proc_dointvec,
2834		},
2835		[NEIGH_VAR_RETRANS_TIME] = {
2836			.procname	= "retrans_time",
2837			.maxlen		= sizeof(int),
2838			.mode		= 0644,
2839			.proc_handler	= proc_dointvec_userhz_jiffies,
2840		},
2841		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
2842			.procname	= "base_reachable_time",
2843			.maxlen		= sizeof(int),
2844			.mode		= 0644,
2845			.proc_handler	= proc_dointvec_jiffies,
2846		},
2847		[NEIGH_VAR_DELAY_PROBE_TIME] = {
2848			.procname	= "delay_first_probe_time",
2849			.maxlen		= sizeof(int),
2850			.mode		= 0644,
2851			.proc_handler	= proc_dointvec_jiffies,
2852		},
2853		[NEIGH_VAR_GC_STALETIME] = {
2854			.procname	= "gc_stale_time",
2855			.maxlen		= sizeof(int),
2856			.mode		= 0644,
2857			.proc_handler	= proc_dointvec_jiffies,
2858		},
2859		[NEIGH_VAR_QUEUE_LEN] = {
2860			.procname	= "unres_qlen",
2861			.maxlen		= sizeof(int),
2862			.mode		= 0644,
2863			.proc_handler	= proc_unres_qlen,
2864		},
2865		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
2866			.procname	= "unres_qlen_bytes",
2867			.maxlen		= sizeof(int),
2868			.mode		= 0644,
2869			.extra1		= &zero,
2870			.proc_handler   = proc_dointvec_minmax,
2871		},
2872		[NEIGH_VAR_PROXY_QLEN] = {
2873			.procname	= "proxy_qlen",
2874			.maxlen		= sizeof(int),
2875			.mode		= 0644,
2876			.proc_handler	= proc_dointvec,
2877		},
2878		[NEIGH_VAR_ANYCAST_DELAY] = {
2879			.procname	= "anycast_delay",
2880			.maxlen		= sizeof(int),
2881			.mode		= 0644,
2882			.proc_handler	= proc_dointvec_userhz_jiffies,
2883		},
2884		[NEIGH_VAR_PROXY_DELAY] = {
2885			.procname	= "proxy_delay",
2886			.maxlen		= sizeof(int),
2887			.mode		= 0644,
2888			.proc_handler	= proc_dointvec_userhz_jiffies,
2889		},
2890		[NEIGH_VAR_LOCKTIME] = {
2891			.procname	= "locktime",
2892			.maxlen		= sizeof(int),
2893			.mode		= 0644,
2894			.proc_handler	= proc_dointvec_userhz_jiffies,
2895		},
2896		[NEIGH_VAR_RETRANS_TIME_MS] = {
2897			.procname	= "retrans_time_ms",
2898			.maxlen		= sizeof(int),
2899			.mode		= 0644,
2900			.proc_handler	= proc_dointvec_ms_jiffies,
2901		},
2902		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2903			.procname	= "base_reachable_time_ms",
2904			.maxlen		= sizeof(int),
2905			.mode		= 0644,
2906			.proc_handler	= proc_dointvec_ms_jiffies,
2907		},
2908		[NEIGH_VAR_GC_INTERVAL] = {
2909			.procname	= "gc_interval",
2910			.maxlen		= sizeof(int),
2911			.mode		= 0644,
2912			.proc_handler	= proc_dointvec_jiffies,
2913		},
2914		[NEIGH_VAR_GC_THRESH1] = {
2915			.procname	= "gc_thresh1",
2916			.maxlen		= sizeof(int),
2917			.mode		= 0644,
2918			.proc_handler	= proc_dointvec,
2919		},
2920		[NEIGH_VAR_GC_THRESH2] = {
2921			.procname	= "gc_thresh2",
2922			.maxlen		= sizeof(int),
2923			.mode		= 0644,
2924			.proc_handler	= proc_dointvec,
2925		},
2926		[NEIGH_VAR_GC_THRESH3] = {
2927			.procname	= "gc_thresh3",
2928			.maxlen		= sizeof(int),
2929			.mode		= 0644,
2930			.proc_handler	= proc_dointvec,
2931		},
2932		{},
2933	},
2934};
2935
2936int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2937			  char *p_name, proc_handler *handler)
2938{
2939	struct neigh_sysctl_table *t;
2940	const char *dev_name_source = NULL;
2941	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2942
2943	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2944	if (!t)
2945		goto err;
2946
2947	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2948	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2949	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2950	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2951	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2952	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2953	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2954	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2955	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2956	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2957	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2958	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2959	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2960	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2961	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2962
2963	if (dev) {
2964		dev_name_source = dev->name;
2965		/* Terminate the table early */
2966		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2967		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2968	} else {
2969		dev_name_source = "default";
2970		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2971		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2972		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2973		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2974	}
2975
2976
2977	if (handler) {
2978		/* RetransTime */
2979		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2980		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2981		/* ReachableTime */
2982		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2983		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2984		/* RetransTime (in milliseconds)*/
2985		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2986		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2987		/* ReachableTime (in milliseconds) */
2988		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2989		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2990	}
2991
2992	/* Don't export sysctls to unprivileged users */
2993	if (neigh_parms_net(p)->user_ns != &init_user_ns)
2994		t->neigh_vars[0].procname = NULL;
2995
2996	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2997		p_name, dev_name_source);
2998	t->sysctl_header =
2999		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3000	if (!t->sysctl_header)
3001		goto free;
3002
3003	p->sysctl_table = t;
3004	return 0;
3005
3006free:
3007	kfree(t);
3008err:
3009	return -ENOBUFS;
3010}
3011EXPORT_SYMBOL(neigh_sysctl_register);
3012
3013void neigh_sysctl_unregister(struct neigh_parms *p)
3014{
3015	if (p->sysctl_table) {
3016		struct neigh_sysctl_table *t = p->sysctl_table;
3017		p->sysctl_table = NULL;
3018		unregister_net_sysctl_table(t->sysctl_header);
3019		kfree(t);
3020	}
3021}
3022EXPORT_SYMBOL(neigh_sysctl_unregister);
3023
3024#endif	/* CONFIG_SYSCTL */
3025
3026static int __init neigh_init(void)
3027{
3028	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3029	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3030	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3031
3032	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3033		      NULL);
3034	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3035
3036	return 0;
3037}
3038
3039subsys_initcall(neigh_init);
3040
3041