1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 *                         Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/rculist.h>
19#include <net/netlink.h>
20#include <net/net_namespace.h>
21#include <net/netns/generic.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter/nfnetlink.h>
26#include <linux/netfilter/ipset/ip_set.h>
27
28static LIST_HEAD(ip_set_type_list);		/* all registered set types */
29static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
30static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
31
32struct ip_set_net {
33	struct ip_set * __rcu *ip_set_list;	/* all individual sets */
34	ip_set_id_t	ip_set_max;	/* max number of sets */
35	int		is_deleted;	/* deleted by ip_set_net_exit */
36};
37static int ip_set_net_id __read_mostly;
38
39static inline struct ip_set_net *ip_set_pernet(struct net *net)
40{
41	return net_generic(net, ip_set_net_id);
42}
43
44#define IP_SET_INC	64
45#define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
46
47static unsigned int max_sets;
48
49module_param(max_sets, int, 0600);
50MODULE_PARM_DESC(max_sets, "maximal number of sets");
51MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
53MODULE_DESCRIPTION("core IP set support");
54MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
55
56/* When the nfnl mutex is held: */
57#define ip_set_dereference(p)		\
58	rcu_dereference_protected(p, 1)
59#define ip_set(inst, id)		\
60	ip_set_dereference((inst)->ip_set_list)[id]
61
62/*
63 * The set types are implemented in modules and registered set types
64 * can be found in ip_set_type_list. Adding/deleting types is
65 * serialized by ip_set_type_mutex.
66 */
67
68static inline void
69ip_set_type_lock(void)
70{
71	mutex_lock(&ip_set_type_mutex);
72}
73
74static inline void
75ip_set_type_unlock(void)
76{
77	mutex_unlock(&ip_set_type_mutex);
78}
79
80/* Register and deregister settype */
81
82static struct ip_set_type *
83find_set_type(const char *name, u8 family, u8 revision)
84{
85	struct ip_set_type *type;
86
87	list_for_each_entry_rcu(type, &ip_set_type_list, list)
88		if (STREQ(type->name, name) &&
89		    (type->family == family ||
90		     type->family == NFPROTO_UNSPEC) &&
91		    revision >= type->revision_min &&
92		    revision <= type->revision_max)
93			return type;
94	return NULL;
95}
96
97/* Unlock, try to load a set type module and lock again */
98static bool
99load_settype(const char *name)
100{
101	nfnl_unlock(NFNL_SUBSYS_IPSET);
102	pr_debug("try to load ip_set_%s\n", name);
103	if (request_module("ip_set_%s", name) < 0) {
104		pr_warn("Can't find ip_set type %s\n", name);
105		nfnl_lock(NFNL_SUBSYS_IPSET);
106		return false;
107	}
108	nfnl_lock(NFNL_SUBSYS_IPSET);
109	return true;
110}
111
112/* Find a set type and reference it */
113#define find_set_type_get(name, family, revision, found)	\
114	__find_set_type_get(name, family, revision, found, false)
115
116static int
117__find_set_type_get(const char *name, u8 family, u8 revision,
118		    struct ip_set_type **found, bool retry)
119{
120	struct ip_set_type *type;
121	int err;
122
123	if (retry && !load_settype(name))
124		return -IPSET_ERR_FIND_TYPE;
125
126	rcu_read_lock();
127	*found = find_set_type(name, family, revision);
128	if (*found) {
129		err = !try_module_get((*found)->me) ? -EFAULT : 0;
130		goto unlock;
131	}
132	/* Make sure the type is already loaded
133	 * but we don't support the revision */
134	list_for_each_entry_rcu(type, &ip_set_type_list, list)
135		if (STREQ(type->name, name)) {
136			err = -IPSET_ERR_FIND_TYPE;
137			goto unlock;
138		}
139	rcu_read_unlock();
140
141	return retry ? -IPSET_ERR_FIND_TYPE :
142		__find_set_type_get(name, family, revision, found, true);
143
144unlock:
145	rcu_read_unlock();
146	return err;
147}
148
149/* Find a given set type by name and family.
150 * If we succeeded, the supported minimal and maximum revisions are
151 * filled out.
152 */
153#define find_set_type_minmax(name, family, min, max) \
154	__find_set_type_minmax(name, family, min, max, false)
155
156static int
157__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
158		       bool retry)
159{
160	struct ip_set_type *type;
161	bool found = false;
162
163	if (retry && !load_settype(name))
164		return -IPSET_ERR_FIND_TYPE;
165
166	*min = 255; *max = 0;
167	rcu_read_lock();
168	list_for_each_entry_rcu(type, &ip_set_type_list, list)
169		if (STREQ(type->name, name) &&
170		    (type->family == family ||
171		     type->family == NFPROTO_UNSPEC)) {
172			found = true;
173			if (type->revision_min < *min)
174				*min = type->revision_min;
175			if (type->revision_max > *max)
176				*max = type->revision_max;
177		}
178	rcu_read_unlock();
179	if (found)
180		return 0;
181
182	return retry ? -IPSET_ERR_FIND_TYPE :
183		__find_set_type_minmax(name, family, min, max, true);
184}
185
186#define family_name(f)	((f) == NFPROTO_IPV4 ? "inet" : \
187			 (f) == NFPROTO_IPV6 ? "inet6" : "any")
188
189/* Register a set type structure. The type is identified by
190 * the unique triple of name, family and revision.
191 */
192int
193ip_set_type_register(struct ip_set_type *type)
194{
195	int ret = 0;
196
197	if (type->protocol != IPSET_PROTOCOL) {
198		pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
199			type->name, family_name(type->family),
200			type->revision_min, type->revision_max,
201			type->protocol, IPSET_PROTOCOL);
202		return -EINVAL;
203	}
204
205	ip_set_type_lock();
206	if (find_set_type(type->name, type->family, type->revision_min)) {
207		/* Duplicate! */
208		pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
209			type->name, family_name(type->family),
210			type->revision_min);
211		ret = -EINVAL;
212		goto unlock;
213	}
214	list_add_rcu(&type->list, &ip_set_type_list);
215	pr_debug("type %s, family %s, revision %u:%u registered.\n",
216		 type->name, family_name(type->family),
217		 type->revision_min, type->revision_max);
218unlock:
219	ip_set_type_unlock();
220	return ret;
221}
222EXPORT_SYMBOL_GPL(ip_set_type_register);
223
224/* Unregister a set type. There's a small race with ip_set_create */
225void
226ip_set_type_unregister(struct ip_set_type *type)
227{
228	ip_set_type_lock();
229	if (!find_set_type(type->name, type->family, type->revision_min)) {
230		pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
231			type->name, family_name(type->family),
232			type->revision_min);
233		goto unlock;
234	}
235	list_del_rcu(&type->list);
236	pr_debug("type %s, family %s with revision min %u unregistered.\n",
237		 type->name, family_name(type->family), type->revision_min);
238unlock:
239	ip_set_type_unlock();
240
241	synchronize_rcu();
242}
243EXPORT_SYMBOL_GPL(ip_set_type_unregister);
244
245/* Utility functions */
246void *
247ip_set_alloc(size_t size)
248{
249	void *members = NULL;
250
251	if (size < KMALLOC_MAX_SIZE)
252		members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
253
254	if (members) {
255		pr_debug("%p: allocated with kmalloc\n", members);
256		return members;
257	}
258
259	members = vzalloc(size);
260	if (!members)
261		return NULL;
262	pr_debug("%p: allocated with vmalloc\n", members);
263
264	return members;
265}
266EXPORT_SYMBOL_GPL(ip_set_alloc);
267
268void
269ip_set_free(void *members)
270{
271	pr_debug("%p: free with %s\n", members,
272		 is_vmalloc_addr(members) ? "vfree" : "kfree");
273	kvfree(members);
274}
275EXPORT_SYMBOL_GPL(ip_set_free);
276
277static inline bool
278flag_nested(const struct nlattr *nla)
279{
280	return nla->nla_type & NLA_F_NESTED;
281}
282
283static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
284	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
285	[IPSET_ATTR_IPADDR_IPV6]	= { .type = NLA_BINARY,
286					    .len = sizeof(struct in6_addr) },
287};
288
289int
290ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
291{
292	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
293
294	if (unlikely(!flag_nested(nla)))
295		return -IPSET_ERR_PROTOCOL;
296	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
297		return -IPSET_ERR_PROTOCOL;
298	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
299		return -IPSET_ERR_PROTOCOL;
300
301	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
302	return 0;
303}
304EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
305
306int
307ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
308{
309	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
310
311	if (unlikely(!flag_nested(nla)))
312		return -IPSET_ERR_PROTOCOL;
313
314	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
315		return -IPSET_ERR_PROTOCOL;
316	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
317		return -IPSET_ERR_PROTOCOL;
318
319	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
320		sizeof(struct in6_addr));
321	return 0;
322}
323EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
324
325typedef void (*destroyer)(void *);
326/* ipset data extension types, in size order */
327
328const struct ip_set_ext_type ip_set_extensions[] = {
329	[IPSET_EXT_ID_COUNTER] = {
330		.type	= IPSET_EXT_COUNTER,
331		.flag	= IPSET_FLAG_WITH_COUNTERS,
332		.len	= sizeof(struct ip_set_counter),
333		.align	= __alignof__(struct ip_set_counter),
334	},
335	[IPSET_EXT_ID_TIMEOUT] = {
336		.type	= IPSET_EXT_TIMEOUT,
337		.len	= sizeof(unsigned long),
338		.align	= __alignof__(unsigned long),
339	},
340	[IPSET_EXT_ID_SKBINFO] = {
341		.type	= IPSET_EXT_SKBINFO,
342		.flag	= IPSET_FLAG_WITH_SKBINFO,
343		.len	= sizeof(struct ip_set_skbinfo),
344		.align	= __alignof__(struct ip_set_skbinfo),
345	},
346	[IPSET_EXT_ID_COMMENT] = {
347		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
348		.flag	 = IPSET_FLAG_WITH_COMMENT,
349		.len	 = sizeof(struct ip_set_comment),
350		.align	 = __alignof__(struct ip_set_comment),
351		.destroy = (destroyer) ip_set_comment_free,
352	},
353};
354EXPORT_SYMBOL_GPL(ip_set_extensions);
355
356static inline bool
357add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
358{
359	return ip_set_extensions[id].flag ?
360		(flags & ip_set_extensions[id].flag) :
361		!!tb[IPSET_ATTR_TIMEOUT];
362}
363
364size_t
365ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
366{
367	enum ip_set_ext_id id;
368	size_t offset = 0;
369	u32 cadt_flags = 0;
370
371	if (tb[IPSET_ATTR_CADT_FLAGS])
372		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
373	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
374		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
375	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
376		if (!add_extension(id, cadt_flags, tb))
377			continue;
378		offset += ALIGN(len + offset, ip_set_extensions[id].align);
379		set->offset[id] = offset;
380		set->extensions |= ip_set_extensions[id].type;
381		offset += ip_set_extensions[id].len;
382	}
383	return len + offset;
384}
385EXPORT_SYMBOL_GPL(ip_set_elem_len);
386
387int
388ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
389		      struct ip_set_ext *ext)
390{
391	u64 fullmark;
392	if (tb[IPSET_ATTR_TIMEOUT]) {
393		if (!(set->extensions & IPSET_EXT_TIMEOUT))
394			return -IPSET_ERR_TIMEOUT;
395		ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
396	}
397	if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
398		if (!(set->extensions & IPSET_EXT_COUNTER))
399			return -IPSET_ERR_COUNTER;
400		if (tb[IPSET_ATTR_BYTES])
401			ext->bytes = be64_to_cpu(nla_get_be64(
402						 tb[IPSET_ATTR_BYTES]));
403		if (tb[IPSET_ATTR_PACKETS])
404			ext->packets = be64_to_cpu(nla_get_be64(
405						   tb[IPSET_ATTR_PACKETS]));
406	}
407	if (tb[IPSET_ATTR_COMMENT]) {
408		if (!(set->extensions & IPSET_EXT_COMMENT))
409			return -IPSET_ERR_COMMENT;
410		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
411	}
412	if (tb[IPSET_ATTR_SKBMARK]) {
413		if (!(set->extensions & IPSET_EXT_SKBINFO))
414			return -IPSET_ERR_SKBINFO;
415		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
416		ext->skbmark = fullmark >> 32;
417		ext->skbmarkmask = fullmark & 0xffffffff;
418	}
419	if (tb[IPSET_ATTR_SKBPRIO]) {
420		if (!(set->extensions & IPSET_EXT_SKBINFO))
421			return -IPSET_ERR_SKBINFO;
422		ext->skbprio = be32_to_cpu(nla_get_be32(
423					    tb[IPSET_ATTR_SKBPRIO]));
424	}
425	if (tb[IPSET_ATTR_SKBQUEUE]) {
426		if (!(set->extensions & IPSET_EXT_SKBINFO))
427			return -IPSET_ERR_SKBINFO;
428		ext->skbqueue = be16_to_cpu(nla_get_be16(
429					    tb[IPSET_ATTR_SKBQUEUE]));
430	}
431	return 0;
432}
433EXPORT_SYMBOL_GPL(ip_set_get_extensions);
434
435/*
436 * Creating/destroying/renaming/swapping affect the existence and
437 * the properties of a set. All of these can be executed from userspace
438 * only and serialized by the nfnl mutex indirectly from nfnetlink.
439 *
440 * Sets are identified by their index in ip_set_list and the index
441 * is used by the external references (set/SET netfilter modules).
442 *
443 * The set behind an index may change by swapping only, from userspace.
444 */
445
446static inline void
447__ip_set_get(struct ip_set *set)
448{
449	write_lock_bh(&ip_set_ref_lock);
450	set->ref++;
451	write_unlock_bh(&ip_set_ref_lock);
452}
453
454static inline void
455__ip_set_put(struct ip_set *set)
456{
457	write_lock_bh(&ip_set_ref_lock);
458	BUG_ON(set->ref == 0);
459	set->ref--;
460	write_unlock_bh(&ip_set_ref_lock);
461}
462
463/*
464 * Add, del and test set entries from kernel.
465 *
466 * The set behind the index must exist and must be referenced
467 * so it can't be destroyed (or changed) under our foot.
468 */
469
470static inline struct ip_set *
471ip_set_rcu_get(struct net *net, ip_set_id_t index)
472{
473	struct ip_set *set;
474	struct ip_set_net *inst = ip_set_pernet(net);
475
476	rcu_read_lock();
477	/* ip_set_list itself needs to be protected */
478	set = rcu_dereference(inst->ip_set_list)[index];
479	rcu_read_unlock();
480
481	return set;
482}
483
484int
485ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
486	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
487{
488	struct ip_set *set = ip_set_rcu_get(
489			dev_net(par->in ? par->in : par->out), index);
490	int ret = 0;
491
492	BUG_ON(set == NULL);
493	pr_debug("set %s, index %u\n", set->name, index);
494
495	if (opt->dim < set->type->dimension ||
496	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
497		return 0;
498
499	read_lock_bh(&set->lock);
500	ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
501	read_unlock_bh(&set->lock);
502
503	if (ret == -EAGAIN) {
504		/* Type requests element to be completed */
505		pr_debug("element must be completed, ADD is triggered\n");
506		write_lock_bh(&set->lock);
507		set->variant->kadt(set, skb, par, IPSET_ADD, opt);
508		write_unlock_bh(&set->lock);
509		ret = 1;
510	} else {
511		/* --return-nomatch: invert matched element */
512		if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
513		    (set->type->features & IPSET_TYPE_NOMATCH) &&
514		    (ret > 0 || ret == -ENOTEMPTY))
515			ret = -ret;
516	}
517
518	/* Convert error codes to nomatch */
519	return (ret < 0 ? 0 : ret);
520}
521EXPORT_SYMBOL_GPL(ip_set_test);
522
523int
524ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
525	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
526{
527	struct ip_set *set = ip_set_rcu_get(
528			dev_net(par->in ? par->in : par->out), index);
529	int ret;
530
531	BUG_ON(set == NULL);
532	pr_debug("set %s, index %u\n", set->name, index);
533
534	if (opt->dim < set->type->dimension ||
535	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
536		return -IPSET_ERR_TYPE_MISMATCH;
537
538	write_lock_bh(&set->lock);
539	ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
540	write_unlock_bh(&set->lock);
541
542	return ret;
543}
544EXPORT_SYMBOL_GPL(ip_set_add);
545
546int
547ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
548	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
549{
550	struct ip_set *set = ip_set_rcu_get(
551			dev_net(par->in ? par->in : par->out), index);
552	int ret = 0;
553
554	BUG_ON(set == NULL);
555	pr_debug("set %s, index %u\n", set->name, index);
556
557	if (opt->dim < set->type->dimension ||
558	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
559		return -IPSET_ERR_TYPE_MISMATCH;
560
561	write_lock_bh(&set->lock);
562	ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
563	write_unlock_bh(&set->lock);
564
565	return ret;
566}
567EXPORT_SYMBOL_GPL(ip_set_del);
568
569/*
570 * Find set by name, reference it once. The reference makes sure the
571 * thing pointed to, does not go away under our feet.
572 *
573 */
574ip_set_id_t
575ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
576{
577	ip_set_id_t i, index = IPSET_INVALID_ID;
578	struct ip_set *s;
579	struct ip_set_net *inst = ip_set_pernet(net);
580
581	rcu_read_lock();
582	for (i = 0; i < inst->ip_set_max; i++) {
583		s = rcu_dereference(inst->ip_set_list)[i];
584		if (s != NULL && STREQ(s->name, name)) {
585			__ip_set_get(s);
586			index = i;
587			*set = s;
588			break;
589		}
590	}
591	rcu_read_unlock();
592
593	return index;
594}
595EXPORT_SYMBOL_GPL(ip_set_get_byname);
596
597/*
598 * If the given set pointer points to a valid set, decrement
599 * reference count by 1. The caller shall not assume the index
600 * to be valid, after calling this function.
601 *
602 */
603
604static inline void
605__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
606{
607	struct ip_set *set;
608
609	rcu_read_lock();
610	set = rcu_dereference(inst->ip_set_list)[index];
611	if (set != NULL)
612		__ip_set_put(set);
613	rcu_read_unlock();
614}
615
616void
617ip_set_put_byindex(struct net *net, ip_set_id_t index)
618{
619	struct ip_set_net *inst = ip_set_pernet(net);
620
621	__ip_set_put_byindex(inst, index);
622}
623EXPORT_SYMBOL_GPL(ip_set_put_byindex);
624
625/*
626 * Get the name of a set behind a set index.
627 * We assume the set is referenced, so it does exist and
628 * can't be destroyed. The set cannot be renamed due to
629 * the referencing either.
630 *
631 */
632const char *
633ip_set_name_byindex(struct net *net, ip_set_id_t index)
634{
635	const struct ip_set *set = ip_set_rcu_get(net, index);
636
637	BUG_ON(set == NULL);
638	BUG_ON(set->ref == 0);
639
640	/* Referenced, so it's safe */
641	return set->name;
642}
643EXPORT_SYMBOL_GPL(ip_set_name_byindex);
644
645/*
646 * Routines to call by external subsystems, which do not
647 * call nfnl_lock for us.
648 */
649
650/*
651 * Find set by index, reference it once. The reference makes sure the
652 * thing pointed to, does not go away under our feet.
653 *
654 * The nfnl mutex is used in the function.
655 */
656ip_set_id_t
657ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
658{
659	struct ip_set *set;
660	struct ip_set_net *inst = ip_set_pernet(net);
661
662	if (index >= inst->ip_set_max)
663		return IPSET_INVALID_ID;
664
665	nfnl_lock(NFNL_SUBSYS_IPSET);
666	set = ip_set(inst, index);
667	if (set)
668		__ip_set_get(set);
669	else
670		index = IPSET_INVALID_ID;
671	nfnl_unlock(NFNL_SUBSYS_IPSET);
672
673	return index;
674}
675EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
676
677/*
678 * If the given set pointer points to a valid set, decrement
679 * reference count by 1. The caller shall not assume the index
680 * to be valid, after calling this function.
681 *
682 * The nfnl mutex is used in the function.
683 */
684void
685ip_set_nfnl_put(struct net *net, ip_set_id_t index)
686{
687	struct ip_set *set;
688	struct ip_set_net *inst = ip_set_pernet(net);
689
690	nfnl_lock(NFNL_SUBSYS_IPSET);
691	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
692		set = ip_set(inst, index);
693		if (set != NULL)
694			__ip_set_put(set);
695	}
696	nfnl_unlock(NFNL_SUBSYS_IPSET);
697}
698EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
699
700/*
701 * Communication protocol with userspace over netlink.
702 *
703 * The commands are serialized by the nfnl mutex.
704 */
705
706static inline bool
707protocol_failed(const struct nlattr * const tb[])
708{
709	return !tb[IPSET_ATTR_PROTOCOL] ||
710	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
711}
712
713static inline u32
714flag_exist(const struct nlmsghdr *nlh)
715{
716	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
717}
718
719static struct nlmsghdr *
720start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
721	  enum ipset_cmd cmd)
722{
723	struct nlmsghdr *nlh;
724	struct nfgenmsg *nfmsg;
725
726	nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
727			sizeof(*nfmsg), flags);
728	if (nlh == NULL)
729		return NULL;
730
731	nfmsg = nlmsg_data(nlh);
732	nfmsg->nfgen_family = NFPROTO_IPV4;
733	nfmsg->version = NFNETLINK_V0;
734	nfmsg->res_id = 0;
735
736	return nlh;
737}
738
739/* Create a set */
740
741static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
742	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
743	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
744				    .len = IPSET_MAXNAMELEN - 1 },
745	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
746				    .len = IPSET_MAXNAMELEN - 1},
747	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
748	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
749	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
750};
751
752static struct ip_set *
753find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
754{
755	struct ip_set *set = NULL;
756	ip_set_id_t i;
757
758	*id = IPSET_INVALID_ID;
759	for (i = 0; i < inst->ip_set_max; i++) {
760		set = ip_set(inst, i);
761		if (set != NULL && STREQ(set->name, name)) {
762			*id = i;
763			break;
764		}
765	}
766	return (*id == IPSET_INVALID_ID ? NULL : set);
767}
768
769static inline struct ip_set *
770find_set(struct ip_set_net *inst, const char *name)
771{
772	ip_set_id_t id;
773
774	return find_set_and_id(inst, name, &id);
775}
776
777static int
778find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
779	     struct ip_set **set)
780{
781	struct ip_set *s;
782	ip_set_id_t i;
783
784	*index = IPSET_INVALID_ID;
785	for (i = 0;  i < inst->ip_set_max; i++) {
786		s = ip_set(inst, i);
787		if (s == NULL) {
788			if (*index == IPSET_INVALID_ID)
789				*index = i;
790		} else if (STREQ(name, s->name)) {
791			/* Name clash */
792			*set = s;
793			return -EEXIST;
794		}
795	}
796	if (*index == IPSET_INVALID_ID)
797		/* No free slot remained */
798		return -IPSET_ERR_MAX_SETS;
799	return 0;
800}
801
802static int
803ip_set_none(struct sock *ctnl, struct sk_buff *skb,
804	    const struct nlmsghdr *nlh,
805	    const struct nlattr * const attr[])
806{
807	return -EOPNOTSUPP;
808}
809
810static int
811ip_set_create(struct sock *ctnl, struct sk_buff *skb,
812	      const struct nlmsghdr *nlh,
813	      const struct nlattr * const attr[])
814{
815	struct net *net = sock_net(ctnl);
816	struct ip_set_net *inst = ip_set_pernet(net);
817	struct ip_set *set, *clash = NULL;
818	ip_set_id_t index = IPSET_INVALID_ID;
819	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
820	const char *name, *typename;
821	u8 family, revision;
822	u32 flags = flag_exist(nlh);
823	int ret = 0;
824
825	if (unlikely(protocol_failed(attr) ||
826		     attr[IPSET_ATTR_SETNAME] == NULL ||
827		     attr[IPSET_ATTR_TYPENAME] == NULL ||
828		     attr[IPSET_ATTR_REVISION] == NULL ||
829		     attr[IPSET_ATTR_FAMILY] == NULL ||
830		     (attr[IPSET_ATTR_DATA] != NULL &&
831		      !flag_nested(attr[IPSET_ATTR_DATA]))))
832		return -IPSET_ERR_PROTOCOL;
833
834	name = nla_data(attr[IPSET_ATTR_SETNAME]);
835	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
836	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
837	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
838	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
839		 name, typename, family_name(family), revision);
840
841	/*
842	 * First, and without any locks, allocate and initialize
843	 * a normal base set structure.
844	 */
845	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
846	if (!set)
847		return -ENOMEM;
848	rwlock_init(&set->lock);
849	strlcpy(set->name, name, IPSET_MAXNAMELEN);
850	set->family = family;
851	set->revision = revision;
852
853	/*
854	 * Next, check that we know the type, and take
855	 * a reference on the type, to make sure it stays available
856	 * while constructing our new set.
857	 *
858	 * After referencing the type, we try to create the type
859	 * specific part of the set without holding any locks.
860	 */
861	ret = find_set_type_get(typename, family, revision, &(set->type));
862	if (ret)
863		goto out;
864
865	/*
866	 * Without holding any locks, create private part.
867	 */
868	if (attr[IPSET_ATTR_DATA] &&
869	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
870			     set->type->create_policy)) {
871		ret = -IPSET_ERR_PROTOCOL;
872		goto put_out;
873	}
874
875	ret = set->type->create(net, set, tb, flags);
876	if (ret != 0)
877		goto put_out;
878
879	/* BTW, ret==0 here. */
880
881	/*
882	 * Here, we have a valid, constructed set and we are protected
883	 * by the nfnl mutex. Find the first free index in ip_set_list
884	 * and check clashing.
885	 */
886	ret = find_free_id(inst, set->name, &index, &clash);
887	if (ret == -EEXIST) {
888		/* If this is the same set and requested, ignore error */
889		if ((flags & IPSET_FLAG_EXIST) &&
890		    STREQ(set->type->name, clash->type->name) &&
891		    set->type->family == clash->type->family &&
892		    set->type->revision_min == clash->type->revision_min &&
893		    set->type->revision_max == clash->type->revision_max &&
894		    set->variant->same_set(set, clash))
895			ret = 0;
896		goto cleanup;
897	} else if (ret == -IPSET_ERR_MAX_SETS) {
898		struct ip_set **list, **tmp;
899		ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
900
901		if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
902			/* Wraparound */
903			goto cleanup;
904
905		list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
906		if (!list)
907			goto cleanup;
908		/* nfnl mutex is held, both lists are valid */
909		tmp = ip_set_dereference(inst->ip_set_list);
910		memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
911		rcu_assign_pointer(inst->ip_set_list, list);
912		/* Make sure all current packets have passed through */
913		synchronize_net();
914		/* Use new list */
915		index = inst->ip_set_max;
916		inst->ip_set_max = i;
917		kfree(tmp);
918		ret = 0;
919	} else if (ret)
920		goto cleanup;
921
922	/*
923	 * Finally! Add our shiny new set to the list, and be done.
924	 */
925	pr_debug("create: '%s' created with index %u!\n", set->name, index);
926	ip_set(inst, index) = set;
927
928	return ret;
929
930cleanup:
931	set->variant->destroy(set);
932put_out:
933	module_put(set->type->me);
934out:
935	kfree(set);
936	return ret;
937}
938
939/* Destroy sets */
940
941static const struct nla_policy
942ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
943	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
944	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
945				    .len = IPSET_MAXNAMELEN - 1 },
946};
947
948static void
949ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
950{
951	struct ip_set *set = ip_set(inst, index);
952
953	pr_debug("set: %s\n",  set->name);
954	ip_set(inst, index) = NULL;
955
956	/* Must call it without holding any lock */
957	set->variant->destroy(set);
958	module_put(set->type->me);
959	kfree(set);
960}
961
962static int
963ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
964	       const struct nlmsghdr *nlh,
965	       const struct nlattr * const attr[])
966{
967	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
968	struct ip_set *s;
969	ip_set_id_t i;
970	int ret = 0;
971
972	if (unlikely(protocol_failed(attr)))
973		return -IPSET_ERR_PROTOCOL;
974
975	/* Commands are serialized and references are
976	 * protected by the ip_set_ref_lock.
977	 * External systems (i.e. xt_set) must call
978	 * ip_set_put|get_nfnl_* functions, that way we
979	 * can safely check references here.
980	 *
981	 * list:set timer can only decrement the reference
982	 * counter, so if it's already zero, we can proceed
983	 * without holding the lock.
984	 */
985	read_lock_bh(&ip_set_ref_lock);
986	if (!attr[IPSET_ATTR_SETNAME]) {
987		for (i = 0; i < inst->ip_set_max; i++) {
988			s = ip_set(inst, i);
989			if (s != NULL && s->ref) {
990				ret = -IPSET_ERR_BUSY;
991				goto out;
992			}
993		}
994		read_unlock_bh(&ip_set_ref_lock);
995		for (i = 0; i < inst->ip_set_max; i++) {
996			s = ip_set(inst, i);
997			if (s != NULL)
998				ip_set_destroy_set(inst, i);
999		}
1000	} else {
1001		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1002				    &i);
1003		if (s == NULL) {
1004			ret = -ENOENT;
1005			goto out;
1006		} else if (s->ref) {
1007			ret = -IPSET_ERR_BUSY;
1008			goto out;
1009		}
1010		read_unlock_bh(&ip_set_ref_lock);
1011
1012		ip_set_destroy_set(inst, i);
1013	}
1014	return 0;
1015out:
1016	read_unlock_bh(&ip_set_ref_lock);
1017	return ret;
1018}
1019
1020/* Flush sets */
1021
1022static void
1023ip_set_flush_set(struct ip_set *set)
1024{
1025	pr_debug("set: %s\n",  set->name);
1026
1027	write_lock_bh(&set->lock);
1028	set->variant->flush(set);
1029	write_unlock_bh(&set->lock);
1030}
1031
1032static int
1033ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
1034	     const struct nlmsghdr *nlh,
1035	     const struct nlattr * const attr[])
1036{
1037	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1038	struct ip_set *s;
1039	ip_set_id_t i;
1040
1041	if (unlikely(protocol_failed(attr)))
1042		return -IPSET_ERR_PROTOCOL;
1043
1044	if (!attr[IPSET_ATTR_SETNAME]) {
1045		for (i = 0; i < inst->ip_set_max; i++) {
1046			s = ip_set(inst, i);
1047			if (s != NULL)
1048				ip_set_flush_set(s);
1049		}
1050	} else {
1051		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1052		if (s == NULL)
1053			return -ENOENT;
1054
1055		ip_set_flush_set(s);
1056	}
1057
1058	return 0;
1059}
1060
1061/* Rename a set */
1062
1063static const struct nla_policy
1064ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
1065	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1066	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1067				    .len = IPSET_MAXNAMELEN - 1 },
1068	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
1069				    .len = IPSET_MAXNAMELEN - 1 },
1070};
1071
1072static int
1073ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
1074	      const struct nlmsghdr *nlh,
1075	      const struct nlattr * const attr[])
1076{
1077	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1078	struct ip_set *set, *s;
1079	const char *name2;
1080	ip_set_id_t i;
1081	int ret = 0;
1082
1083	if (unlikely(protocol_failed(attr) ||
1084		     attr[IPSET_ATTR_SETNAME] == NULL ||
1085		     attr[IPSET_ATTR_SETNAME2] == NULL))
1086		return -IPSET_ERR_PROTOCOL;
1087
1088	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1089	if (set == NULL)
1090		return -ENOENT;
1091
1092	read_lock_bh(&ip_set_ref_lock);
1093	if (set->ref != 0) {
1094		ret = -IPSET_ERR_REFERENCED;
1095		goto out;
1096	}
1097
1098	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1099	for (i = 0; i < inst->ip_set_max; i++) {
1100		s = ip_set(inst, i);
1101		if (s != NULL && STREQ(s->name, name2)) {
1102			ret = -IPSET_ERR_EXIST_SETNAME2;
1103			goto out;
1104		}
1105	}
1106	strncpy(set->name, name2, IPSET_MAXNAMELEN);
1107
1108out:
1109	read_unlock_bh(&ip_set_ref_lock);
1110	return ret;
1111}
1112
1113/* Swap two sets so that name/index points to the other.
1114 * References and set names are also swapped.
1115 *
1116 * The commands are serialized by the nfnl mutex and references are
1117 * protected by the ip_set_ref_lock. The kernel interfaces
1118 * do not hold the mutex but the pointer settings are atomic
1119 * so the ip_set_list always contains valid pointers to the sets.
1120 */
1121
1122static int
1123ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1124	    const struct nlmsghdr *nlh,
1125	    const struct nlattr * const attr[])
1126{
1127	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1128	struct ip_set *from, *to;
1129	ip_set_id_t from_id, to_id;
1130	char from_name[IPSET_MAXNAMELEN];
1131
1132	if (unlikely(protocol_failed(attr) ||
1133		     attr[IPSET_ATTR_SETNAME] == NULL ||
1134		     attr[IPSET_ATTR_SETNAME2] == NULL))
1135		return -IPSET_ERR_PROTOCOL;
1136
1137	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
1138			       &from_id);
1139	if (from == NULL)
1140		return -ENOENT;
1141
1142	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
1143			     &to_id);
1144	if (to == NULL)
1145		return -IPSET_ERR_EXIST_SETNAME2;
1146
1147	/* Features must not change.
1148	 * Not an artificial restriction anymore, as we must prevent
1149	 * possible loops created by swapping in setlist type of sets. */
1150	if (!(from->type->features == to->type->features &&
1151	      from->family == to->family))
1152		return -IPSET_ERR_TYPE_MISMATCH;
1153
1154	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
1155	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
1156	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
1157
1158	write_lock_bh(&ip_set_ref_lock);
1159	swap(from->ref, to->ref);
1160	ip_set(inst, from_id) = to;
1161	ip_set(inst, to_id) = from;
1162	write_unlock_bh(&ip_set_ref_lock);
1163
1164	return 0;
1165}
1166
1167/* List/save set data */
1168
1169#define DUMP_INIT	0
1170#define DUMP_ALL	1
1171#define DUMP_ONE	2
1172#define DUMP_LAST	3
1173
1174#define DUMP_TYPE(arg)		(((u32)(arg)) & 0x0000FFFF)
1175#define DUMP_FLAGS(arg)		(((u32)(arg)) >> 16)
1176
1177static int
1178ip_set_dump_done(struct netlink_callback *cb)
1179{
1180	struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
1181	if (cb->args[IPSET_CB_ARG0]) {
1182		pr_debug("release set %s\n",
1183			 ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
1184		__ip_set_put_byindex(inst,
1185			(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
1186	}
1187	return 0;
1188}
1189
1190static inline void
1191dump_attrs(struct nlmsghdr *nlh)
1192{
1193	const struct nlattr *attr;
1194	int rem;
1195
1196	pr_debug("dump nlmsg\n");
1197	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1198		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1199	}
1200}
1201
1202static int
1203dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
1204{
1205	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1206	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1207	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1208	struct nlattr *attr = (void *)nlh + min_len;
1209	u32 dump_type;
1210	ip_set_id_t index;
1211
1212	/* Second pass, so parser can't fail */
1213	nla_parse(cda, IPSET_ATTR_CMD_MAX,
1214		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1215
1216	/* cb->args[IPSET_CB_NET]:	net namespace
1217	 *         [IPSET_CB_DUMP]:	dump single set/all sets
1218	 *         [IPSET_CB_INDEX]: 	set index
1219	 *         [IPSET_CB_ARG0]:	type specific
1220	 */
1221
1222	if (cda[IPSET_ATTR_SETNAME]) {
1223		struct ip_set *set;
1224
1225		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
1226				      &index);
1227		if (set == NULL)
1228			return -ENOENT;
1229
1230		dump_type = DUMP_ONE;
1231		cb->args[IPSET_CB_INDEX] = index;
1232	} else
1233		dump_type = DUMP_ALL;
1234
1235	if (cda[IPSET_ATTR_FLAGS]) {
1236		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1237		dump_type |= (f << 16);
1238	}
1239	cb->args[IPSET_CB_NET] = (unsigned long)inst;
1240	cb->args[IPSET_CB_DUMP] = dump_type;
1241
1242	return 0;
1243}
1244
1245static int
1246ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1247{
1248	ip_set_id_t index = IPSET_INVALID_ID, max;
1249	struct ip_set *set = NULL;
1250	struct nlmsghdr *nlh = NULL;
1251	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
1252	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
1253	u32 dump_type, dump_flags;
1254	int ret = 0;
1255
1256	if (!cb->args[IPSET_CB_DUMP]) {
1257		ret = dump_init(cb, inst);
1258		if (ret < 0) {
1259			nlh = nlmsg_hdr(cb->skb);
1260			/* We have to create and send the error message
1261			 * manually :-( */
1262			if (nlh->nlmsg_flags & NLM_F_ACK)
1263				netlink_ack(cb->skb, nlh, ret);
1264			return ret;
1265		}
1266	}
1267
1268	if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
1269		goto out;
1270
1271	dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
1272	dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
1273	max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
1274				    : inst->ip_set_max;
1275dump_last:
1276	pr_debug("dump type, flag: %u %u index: %ld\n",
1277		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
1278	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
1279		index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
1280		set = ip_set(inst, index);
1281		if (set == NULL) {
1282			if (dump_type == DUMP_ONE) {
1283				ret = -ENOENT;
1284				goto out;
1285			}
1286			continue;
1287		}
1288		/* When dumping all sets, we must dump "sorted"
1289		 * so that lists (unions of sets) are dumped last.
1290		 */
1291		if (dump_type != DUMP_ONE &&
1292		    ((dump_type == DUMP_ALL) ==
1293		     !!(set->type->features & IPSET_DUMP_LAST)))
1294			continue;
1295		pr_debug("List set: %s\n", set->name);
1296		if (!cb->args[IPSET_CB_ARG0]) {
1297			/* Start listing: make sure set won't be destroyed */
1298			pr_debug("reference set\n");
1299			__ip_set_get(set);
1300		}
1301		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1302				cb->nlh->nlmsg_seq, flags,
1303				IPSET_CMD_LIST);
1304		if (!nlh) {
1305			ret = -EMSGSIZE;
1306			goto release_refcount;
1307		}
1308		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1309		    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1310			goto nla_put_failure;
1311		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1312			goto next_set;
1313		switch (cb->args[IPSET_CB_ARG0]) {
1314		case 0:
1315			/* Core header data */
1316			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1317					   set->type->name) ||
1318			    nla_put_u8(skb, IPSET_ATTR_FAMILY,
1319				       set->family) ||
1320			    nla_put_u8(skb, IPSET_ATTR_REVISION,
1321				       set->revision))
1322				goto nla_put_failure;
1323			ret = set->variant->head(set, skb);
1324			if (ret < 0)
1325				goto release_refcount;
1326			if (dump_flags & IPSET_FLAG_LIST_HEADER)
1327				goto next_set;
1328			/* Fall through and add elements */
1329		default:
1330			read_lock_bh(&set->lock);
1331			ret = set->variant->list(set, skb, cb);
1332			read_unlock_bh(&set->lock);
1333			if (!cb->args[IPSET_CB_ARG0])
1334				/* Set is done, proceed with next one */
1335				goto next_set;
1336			goto release_refcount;
1337		}
1338	}
1339	/* If we dump all sets, continue with dumping last ones */
1340	if (dump_type == DUMP_ALL) {
1341		dump_type = DUMP_LAST;
1342		cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
1343		cb->args[IPSET_CB_INDEX] = 0;
1344		goto dump_last;
1345	}
1346	goto out;
1347
1348nla_put_failure:
1349	ret = -EFAULT;
1350next_set:
1351	if (dump_type == DUMP_ONE)
1352		cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
1353	else
1354		cb->args[IPSET_CB_INDEX]++;
1355release_refcount:
1356	/* If there was an error or set is done, release set */
1357	if (ret || !cb->args[IPSET_CB_ARG0]) {
1358		pr_debug("release set %s\n", ip_set(inst, index)->name);
1359		__ip_set_put_byindex(inst, index);
1360		cb->args[IPSET_CB_ARG0] = 0;
1361	}
1362out:
1363	if (nlh) {
1364		nlmsg_end(skb, nlh);
1365		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1366		dump_attrs(nlh);
1367	}
1368
1369	return ret < 0 ? ret : skb->len;
1370}
1371
1372static int
1373ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1374	    const struct nlmsghdr *nlh,
1375	    const struct nlattr * const attr[])
1376{
1377	if (unlikely(protocol_failed(attr)))
1378		return -IPSET_ERR_PROTOCOL;
1379
1380	{
1381		struct netlink_dump_control c = {
1382			.dump = ip_set_dump_start,
1383			.done = ip_set_dump_done,
1384		};
1385		return netlink_dump_start(ctnl, skb, nlh, &c);
1386	}
1387}
1388
1389/* Add, del and test */
1390
1391static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1392	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1393	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1394				    .len = IPSET_MAXNAMELEN - 1 },
1395	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
1396	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
1397	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
1398};
1399
1400static int
1401call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1402	struct nlattr *tb[], enum ipset_adt adt,
1403	u32 flags, bool use_lineno)
1404{
1405	int ret;
1406	u32 lineno = 0;
1407	bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1408
1409	do {
1410		write_lock_bh(&set->lock);
1411		ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1412		write_unlock_bh(&set->lock);
1413		retried = true;
1414	} while (ret == -EAGAIN &&
1415		 set->variant->resize &&
1416		 (ret = set->variant->resize(set, retried)) == 0);
1417
1418	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1419		return 0;
1420	if (lineno && use_lineno) {
1421		/* Error in restore/batch mode: send back lineno */
1422		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1423		struct sk_buff *skb2;
1424		struct nlmsgerr *errmsg;
1425		size_t payload = min(SIZE_MAX,
1426				     sizeof(*errmsg) + nlmsg_len(nlh));
1427		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1428		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1429		struct nlattr *cmdattr;
1430		u32 *errline;
1431
1432		skb2 = nlmsg_new(payload, GFP_KERNEL);
1433		if (skb2 == NULL)
1434			return -ENOMEM;
1435		rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
1436				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1437		errmsg = nlmsg_data(rep);
1438		errmsg->error = ret;
1439		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1440		cmdattr = (void *)&errmsg->msg + min_len;
1441
1442		nla_parse(cda, IPSET_ATTR_CMD_MAX,
1443			  cmdattr, nlh->nlmsg_len - min_len,
1444			  ip_set_adt_policy);
1445
1446		errline = nla_data(cda[IPSET_ATTR_LINENO]);
1447
1448		*errline = lineno;
1449
1450		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1451		/* Signal netlink not to send its ACK/errmsg.  */
1452		return -EINTR;
1453	}
1454
1455	return ret;
1456}
1457
1458static int
1459ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1460	    const struct nlmsghdr *nlh,
1461	    const struct nlattr * const attr[])
1462{
1463	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1464	struct ip_set *set;
1465	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1466	const struct nlattr *nla;
1467	u32 flags = flag_exist(nlh);
1468	bool use_lineno;
1469	int ret = 0;
1470
1471	if (unlikely(protocol_failed(attr) ||
1472		     attr[IPSET_ATTR_SETNAME] == NULL ||
1473		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1474		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1475		     (attr[IPSET_ATTR_DATA] != NULL &&
1476		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1477		     (attr[IPSET_ATTR_ADT] != NULL &&
1478		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1479		       attr[IPSET_ATTR_LINENO] == NULL))))
1480		return -IPSET_ERR_PROTOCOL;
1481
1482	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1483	if (set == NULL)
1484		return -ENOENT;
1485
1486	use_lineno = !!attr[IPSET_ATTR_LINENO];
1487	if (attr[IPSET_ATTR_DATA]) {
1488		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1489				     attr[IPSET_ATTR_DATA],
1490				     set->type->adt_policy))
1491			return -IPSET_ERR_PROTOCOL;
1492		ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1493			      use_lineno);
1494	} else {
1495		int nla_rem;
1496
1497		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1498			memset(tb, 0, sizeof(tb));
1499			if (nla_type(nla) != IPSET_ATTR_DATA ||
1500			    !flag_nested(nla) ||
1501			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1502					     set->type->adt_policy))
1503				return -IPSET_ERR_PROTOCOL;
1504			ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1505				      flags, use_lineno);
1506			if (ret < 0)
1507				return ret;
1508		}
1509	}
1510	return ret;
1511}
1512
1513static int
1514ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1515	    const struct nlmsghdr *nlh,
1516	    const struct nlattr * const attr[])
1517{
1518	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1519	struct ip_set *set;
1520	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1521	const struct nlattr *nla;
1522	u32 flags = flag_exist(nlh);
1523	bool use_lineno;
1524	int ret = 0;
1525
1526	if (unlikely(protocol_failed(attr) ||
1527		     attr[IPSET_ATTR_SETNAME] == NULL ||
1528		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1529		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1530		     (attr[IPSET_ATTR_DATA] != NULL &&
1531		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1532		     (attr[IPSET_ATTR_ADT] != NULL &&
1533		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1534		       attr[IPSET_ATTR_LINENO] == NULL))))
1535		return -IPSET_ERR_PROTOCOL;
1536
1537	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1538	if (set == NULL)
1539		return -ENOENT;
1540
1541	use_lineno = !!attr[IPSET_ATTR_LINENO];
1542	if (attr[IPSET_ATTR_DATA]) {
1543		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1544				     attr[IPSET_ATTR_DATA],
1545				     set->type->adt_policy))
1546			return -IPSET_ERR_PROTOCOL;
1547		ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1548			      use_lineno);
1549	} else {
1550		int nla_rem;
1551
1552		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1553			memset(tb, 0, sizeof(*tb));
1554			if (nla_type(nla) != IPSET_ATTR_DATA ||
1555			    !flag_nested(nla) ||
1556			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1557					     set->type->adt_policy))
1558				return -IPSET_ERR_PROTOCOL;
1559			ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1560				      flags, use_lineno);
1561			if (ret < 0)
1562				return ret;
1563		}
1564	}
1565	return ret;
1566}
1567
1568static int
1569ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1570	     const struct nlmsghdr *nlh,
1571	     const struct nlattr * const attr[])
1572{
1573	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1574	struct ip_set *set;
1575	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1576	int ret = 0;
1577
1578	if (unlikely(protocol_failed(attr) ||
1579		     attr[IPSET_ATTR_SETNAME] == NULL ||
1580		     attr[IPSET_ATTR_DATA] == NULL ||
1581		     !flag_nested(attr[IPSET_ATTR_DATA])))
1582		return -IPSET_ERR_PROTOCOL;
1583
1584	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1585	if (set == NULL)
1586		return -ENOENT;
1587
1588	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1589			     set->type->adt_policy))
1590		return -IPSET_ERR_PROTOCOL;
1591
1592	read_lock_bh(&set->lock);
1593	ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
1594	read_unlock_bh(&set->lock);
1595	/* Userspace can't trigger element to be re-added */
1596	if (ret == -EAGAIN)
1597		ret = 1;
1598
1599	return ret > 0 ? 0 : -IPSET_ERR_EXIST;
1600}
1601
1602/* Get headed data of a set */
1603
1604static int
1605ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1606	      const struct nlmsghdr *nlh,
1607	      const struct nlattr * const attr[])
1608{
1609	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
1610	const struct ip_set *set;
1611	struct sk_buff *skb2;
1612	struct nlmsghdr *nlh2;
1613	int ret = 0;
1614
1615	if (unlikely(protocol_failed(attr) ||
1616		     attr[IPSET_ATTR_SETNAME] == NULL))
1617		return -IPSET_ERR_PROTOCOL;
1618
1619	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
1620	if (set == NULL)
1621		return -ENOENT;
1622
1623	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1624	if (skb2 == NULL)
1625		return -ENOMEM;
1626
1627	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1628			 IPSET_CMD_HEADER);
1629	if (!nlh2)
1630		goto nlmsg_failure;
1631	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1632	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1633	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1634	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1635	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1636		goto nla_put_failure;
1637	nlmsg_end(skb2, nlh2);
1638
1639	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1640	if (ret < 0)
1641		return ret;
1642
1643	return 0;
1644
1645nla_put_failure:
1646	nlmsg_cancel(skb2, nlh2);
1647nlmsg_failure:
1648	kfree_skb(skb2);
1649	return -EMSGSIZE;
1650}
1651
1652/* Get type data */
1653
1654static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1655	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1656	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
1657				    .len = IPSET_MAXNAMELEN - 1 },
1658	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
1659};
1660
1661static int
1662ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1663	    const struct nlmsghdr *nlh,
1664	    const struct nlattr * const attr[])
1665{
1666	struct sk_buff *skb2;
1667	struct nlmsghdr *nlh2;
1668	u8 family, min, max;
1669	const char *typename;
1670	int ret = 0;
1671
1672	if (unlikely(protocol_failed(attr) ||
1673		     attr[IPSET_ATTR_TYPENAME] == NULL ||
1674		     attr[IPSET_ATTR_FAMILY] == NULL))
1675		return -IPSET_ERR_PROTOCOL;
1676
1677	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1678	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1679	ret = find_set_type_minmax(typename, family, &min, &max);
1680	if (ret)
1681		return ret;
1682
1683	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1684	if (skb2 == NULL)
1685		return -ENOMEM;
1686
1687	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1688			 IPSET_CMD_TYPE);
1689	if (!nlh2)
1690		goto nlmsg_failure;
1691	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1692	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1693	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1694	    nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1695	    nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1696		goto nla_put_failure;
1697	nlmsg_end(skb2, nlh2);
1698
1699	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1700	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1701	if (ret < 0)
1702		return ret;
1703
1704	return 0;
1705
1706nla_put_failure:
1707	nlmsg_cancel(skb2, nlh2);
1708nlmsg_failure:
1709	kfree_skb(skb2);
1710	return -EMSGSIZE;
1711}
1712
1713/* Get protocol version */
1714
1715static const struct nla_policy
1716ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1717	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1718};
1719
1720static int
1721ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1722		const struct nlmsghdr *nlh,
1723		const struct nlattr * const attr[])
1724{
1725	struct sk_buff *skb2;
1726	struct nlmsghdr *nlh2;
1727	int ret = 0;
1728
1729	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1730		return -IPSET_ERR_PROTOCOL;
1731
1732	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1733	if (skb2 == NULL)
1734		return -ENOMEM;
1735
1736	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
1737			 IPSET_CMD_PROTOCOL);
1738	if (!nlh2)
1739		goto nlmsg_failure;
1740	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1741		goto nla_put_failure;
1742	nlmsg_end(skb2, nlh2);
1743
1744	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
1745	if (ret < 0)
1746		return ret;
1747
1748	return 0;
1749
1750nla_put_failure:
1751	nlmsg_cancel(skb2, nlh2);
1752nlmsg_failure:
1753	kfree_skb(skb2);
1754	return -EMSGSIZE;
1755}
1756
1757static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1758	[IPSET_CMD_NONE]	= {
1759		.call		= ip_set_none,
1760		.attr_count	= IPSET_ATTR_CMD_MAX,
1761	},
1762	[IPSET_CMD_CREATE]	= {
1763		.call		= ip_set_create,
1764		.attr_count	= IPSET_ATTR_CMD_MAX,
1765		.policy		= ip_set_create_policy,
1766	},
1767	[IPSET_CMD_DESTROY]	= {
1768		.call		= ip_set_destroy,
1769		.attr_count	= IPSET_ATTR_CMD_MAX,
1770		.policy		= ip_set_setname_policy,
1771	},
1772	[IPSET_CMD_FLUSH]	= {
1773		.call		= ip_set_flush,
1774		.attr_count	= IPSET_ATTR_CMD_MAX,
1775		.policy		= ip_set_setname_policy,
1776	},
1777	[IPSET_CMD_RENAME]	= {
1778		.call		= ip_set_rename,
1779		.attr_count	= IPSET_ATTR_CMD_MAX,
1780		.policy		= ip_set_setname2_policy,
1781	},
1782	[IPSET_CMD_SWAP]	= {
1783		.call		= ip_set_swap,
1784		.attr_count	= IPSET_ATTR_CMD_MAX,
1785		.policy		= ip_set_setname2_policy,
1786	},
1787	[IPSET_CMD_LIST]	= {
1788		.call		= ip_set_dump,
1789		.attr_count	= IPSET_ATTR_CMD_MAX,
1790		.policy		= ip_set_setname_policy,
1791	},
1792	[IPSET_CMD_SAVE]	= {
1793		.call		= ip_set_dump,
1794		.attr_count	= IPSET_ATTR_CMD_MAX,
1795		.policy		= ip_set_setname_policy,
1796	},
1797	[IPSET_CMD_ADD]	= {
1798		.call		= ip_set_uadd,
1799		.attr_count	= IPSET_ATTR_CMD_MAX,
1800		.policy		= ip_set_adt_policy,
1801	},
1802	[IPSET_CMD_DEL]	= {
1803		.call		= ip_set_udel,
1804		.attr_count	= IPSET_ATTR_CMD_MAX,
1805		.policy		= ip_set_adt_policy,
1806	},
1807	[IPSET_CMD_TEST]	= {
1808		.call		= ip_set_utest,
1809		.attr_count	= IPSET_ATTR_CMD_MAX,
1810		.policy		= ip_set_adt_policy,
1811	},
1812	[IPSET_CMD_HEADER]	= {
1813		.call		= ip_set_header,
1814		.attr_count	= IPSET_ATTR_CMD_MAX,
1815		.policy		= ip_set_setname_policy,
1816	},
1817	[IPSET_CMD_TYPE]	= {
1818		.call		= ip_set_type,
1819		.attr_count	= IPSET_ATTR_CMD_MAX,
1820		.policy		= ip_set_type_policy,
1821	},
1822	[IPSET_CMD_PROTOCOL]	= {
1823		.call		= ip_set_protocol,
1824		.attr_count	= IPSET_ATTR_CMD_MAX,
1825		.policy		= ip_set_protocol_policy,
1826	},
1827};
1828
1829static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1830	.name		= "ip_set",
1831	.subsys_id	= NFNL_SUBSYS_IPSET,
1832	.cb_count	= IPSET_MSG_MAX,
1833	.cb		= ip_set_netlink_subsys_cb,
1834};
1835
1836/* Interface to iptables/ip6tables */
1837
1838static int
1839ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1840{
1841	unsigned int *op;
1842	void *data;
1843	int copylen = *len, ret = 0;
1844	struct net *net = sock_net(sk);
1845	struct ip_set_net *inst = ip_set_pernet(net);
1846
1847	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1848		return -EPERM;
1849	if (optval != SO_IP_SET)
1850		return -EBADF;
1851	if (*len < sizeof(unsigned int))
1852		return -EINVAL;
1853
1854	data = vmalloc(*len);
1855	if (!data)
1856		return -ENOMEM;
1857	if (copy_from_user(data, user, *len) != 0) {
1858		ret = -EFAULT;
1859		goto done;
1860	}
1861	op = (unsigned int *) data;
1862
1863	if (*op < IP_SET_OP_VERSION) {
1864		/* Check the version at the beginning of operations */
1865		struct ip_set_req_version *req_version = data;
1866
1867		if (*len < sizeof(struct ip_set_req_version)) {
1868			ret = -EINVAL;
1869			goto done;
1870		}
1871
1872		if (req_version->version != IPSET_PROTOCOL) {
1873			ret = -EPROTO;
1874			goto done;
1875		}
1876	}
1877
1878	switch (*op) {
1879	case IP_SET_OP_VERSION: {
1880		struct ip_set_req_version *req_version = data;
1881
1882		if (*len != sizeof(struct ip_set_req_version)) {
1883			ret = -EINVAL;
1884			goto done;
1885		}
1886
1887		req_version->version = IPSET_PROTOCOL;
1888		ret = copy_to_user(user, req_version,
1889				   sizeof(struct ip_set_req_version));
1890		goto done;
1891	}
1892	case IP_SET_OP_GET_BYNAME: {
1893		struct ip_set_req_get_set *req_get = data;
1894		ip_set_id_t id;
1895
1896		if (*len != sizeof(struct ip_set_req_get_set)) {
1897			ret = -EINVAL;
1898			goto done;
1899		}
1900		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1901		nfnl_lock(NFNL_SUBSYS_IPSET);
1902		find_set_and_id(inst, req_get->set.name, &id);
1903		req_get->set.index = id;
1904		nfnl_unlock(NFNL_SUBSYS_IPSET);
1905		goto copy;
1906	}
1907	case IP_SET_OP_GET_FNAME: {
1908		struct ip_set_req_get_set_family *req_get = data;
1909		ip_set_id_t id;
1910
1911		if (*len != sizeof(struct ip_set_req_get_set_family)) {
1912			ret = -EINVAL;
1913			goto done;
1914		}
1915		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1916		nfnl_lock(NFNL_SUBSYS_IPSET);
1917		find_set_and_id(inst, req_get->set.name, &id);
1918		req_get->set.index = id;
1919		if (id != IPSET_INVALID_ID)
1920			req_get->family = ip_set(inst, id)->family;
1921		nfnl_unlock(NFNL_SUBSYS_IPSET);
1922		goto copy;
1923	}
1924	case IP_SET_OP_GET_BYINDEX: {
1925		struct ip_set_req_get_set *req_get = data;
1926		struct ip_set *set;
1927
1928		if (*len != sizeof(struct ip_set_req_get_set) ||
1929		    req_get->set.index >= inst->ip_set_max) {
1930			ret = -EINVAL;
1931			goto done;
1932		}
1933		nfnl_lock(NFNL_SUBSYS_IPSET);
1934		set = ip_set(inst, req_get->set.index);
1935		strncpy(req_get->set.name, set ? set->name : "",
1936			IPSET_MAXNAMELEN);
1937		nfnl_unlock(NFNL_SUBSYS_IPSET);
1938		goto copy;
1939	}
1940	default:
1941		ret = -EBADMSG;
1942		goto done;
1943	}	/* end of switch(op) */
1944
1945copy:
1946	ret = copy_to_user(user, data, copylen);
1947
1948done:
1949	vfree(data);
1950	if (ret > 0)
1951		ret = 0;
1952	return ret;
1953}
1954
1955static struct nf_sockopt_ops so_set __read_mostly = {
1956	.pf		= PF_INET,
1957	.get_optmin	= SO_IP_SET,
1958	.get_optmax	= SO_IP_SET + 1,
1959	.get		= &ip_set_sockfn_get,
1960	.owner		= THIS_MODULE,
1961};
1962
1963static int __net_init
1964ip_set_net_init(struct net *net)
1965{
1966	struct ip_set_net *inst = ip_set_pernet(net);
1967	struct ip_set **list;
1968
1969	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
1970	if (inst->ip_set_max >= IPSET_INVALID_ID)
1971		inst->ip_set_max = IPSET_INVALID_ID - 1;
1972
1973	list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
1974	if (!list)
1975		return -ENOMEM;
1976	inst->is_deleted = 0;
1977	rcu_assign_pointer(inst->ip_set_list, list);
1978	return 0;
1979}
1980
1981static void __net_exit
1982ip_set_net_exit(struct net *net)
1983{
1984	struct ip_set_net *inst = ip_set_pernet(net);
1985
1986	struct ip_set *set = NULL;
1987	ip_set_id_t i;
1988
1989	inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
1990
1991	for (i = 0; i < inst->ip_set_max; i++) {
1992		set = ip_set(inst, i);
1993		if (set != NULL)
1994			ip_set_destroy_set(inst, i);
1995	}
1996	kfree(rcu_dereference_protected(inst->ip_set_list, 1));
1997}
1998
1999static struct pernet_operations ip_set_net_ops = {
2000	.init	= ip_set_net_init,
2001	.exit   = ip_set_net_exit,
2002	.id	= &ip_set_net_id,
2003	.size	= sizeof(struct ip_set_net)
2004};
2005
2006
2007static int __init
2008ip_set_init(void)
2009{
2010	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
2011	if (ret != 0) {
2012		pr_err("ip_set: cannot register with nfnetlink.\n");
2013		return ret;
2014	}
2015	ret = nf_register_sockopt(&so_set);
2016	if (ret != 0) {
2017		pr_err("SO_SET registry failed: %d\n", ret);
2018		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2019		return ret;
2020	}
2021	ret = register_pernet_subsys(&ip_set_net_ops);
2022	if (ret) {
2023		pr_err("ip_set: cannot register pernet_subsys.\n");
2024		nf_unregister_sockopt(&so_set);
2025		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2026		return ret;
2027	}
2028	pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
2029	return 0;
2030}
2031
2032static void __exit
2033ip_set_fini(void)
2034{
2035	unregister_pernet_subsys(&ip_set_net_ops);
2036	nf_unregister_sockopt(&so_set);
2037	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2038	pr_debug("these are the famous last words\n");
2039}
2040
2041module_init(ip_set_init);
2042module_exit(ip_set_fini);
2043