ip_set_core.c revision d31f4d448f7671dc3e6a7a1c92a4c085a36058bb
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 *                         Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
20#include <net/netlink.h>
21
22#include <linux/netfilter.h>
23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list);		/* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
29static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
30
31static struct ip_set **ip_set_list;		/* all individual sets */
32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
33
34#define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36static unsigned int max_sets;
37
38module_param(max_sets, int, 0600);
39MODULE_PARM_DESC(max_sets, "maximal number of sets");
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
45/*
46 * The set types are implemented in modules and registered set types
47 * can be found in ip_set_type_list. Adding/deleting types is
48 * serialized by ip_set_type_mutex.
49 */
50
51static inline void
52ip_set_type_lock(void)
53{
54	mutex_lock(&ip_set_type_mutex);
55}
56
57static inline void
58ip_set_type_unlock(void)
59{
60	mutex_unlock(&ip_set_type_mutex);
61}
62
63/* Register and deregister settype */
64
65static struct ip_set_type *
66find_set_type(const char *name, u8 family, u8 revision)
67{
68	struct ip_set_type *type;
69
70	list_for_each_entry_rcu(type, &ip_set_type_list, list)
71		if (STREQ(type->name, name) &&
72		    (type->family == family || type->family == NFPROTO_UNSPEC) &&
73		    revision >= type->revision_min &&
74		    revision <= type->revision_max)
75			return type;
76	return NULL;
77}
78
79/* Unlock, try to load a set type module and lock again */
80static bool
81load_settype(const char *name)
82{
83	nfnl_unlock();
84	pr_debug("try to load ip_set_%s\n", name);
85	if (request_module("ip_set_%s", name) < 0) {
86		pr_warning("Can't find ip_set type %s\n", name);
87		nfnl_lock();
88		return false;
89	}
90	nfnl_lock();
91	return true;
92}
93
94/* Find a set type and reference it */
95#define find_set_type_get(name, family, revision, found)	\
96	__find_set_type_get(name, family, revision, found, false)
97
98static int
99__find_set_type_get(const char *name, u8 family, u8 revision,
100		    struct ip_set_type **found, bool retry)
101{
102	struct ip_set_type *type;
103	int err;
104
105	if (retry && !load_settype(name))
106		return -IPSET_ERR_FIND_TYPE;
107
108	rcu_read_lock();
109	*found = find_set_type(name, family, revision);
110	if (*found) {
111		err = !try_module_get((*found)->me) ? -EFAULT : 0;
112		goto unlock;
113	}
114	/* Make sure the type is already loaded
115	 * but we don't support the revision */
116	list_for_each_entry_rcu(type, &ip_set_type_list, list)
117		if (STREQ(type->name, name)) {
118			err = -IPSET_ERR_FIND_TYPE;
119			goto unlock;
120		}
121	rcu_read_unlock();
122
123	return retry ? -IPSET_ERR_FIND_TYPE :
124		__find_set_type_get(name, family, revision, found, true);
125
126unlock:
127	rcu_read_unlock();
128	return err;
129}
130
131/* Find a given set type by name and family.
132 * If we succeeded, the supported minimal and maximum revisions are
133 * filled out.
134 */
135#define find_set_type_minmax(name, family, min, max) \
136	__find_set_type_minmax(name, family, min, max, false)
137
138static int
139__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
140		       bool retry)
141{
142	struct ip_set_type *type;
143	bool found = false;
144
145	if (retry && !load_settype(name))
146		return -IPSET_ERR_FIND_TYPE;
147
148	*min = 255; *max = 0;
149	rcu_read_lock();
150	list_for_each_entry_rcu(type, &ip_set_type_list, list)
151		if (STREQ(type->name, name) &&
152		    (type->family == family || type->family == NFPROTO_UNSPEC)) {
153			found = true;
154			if (type->revision_min < *min)
155				*min = type->revision_min;
156			if (type->revision_max > *max)
157				*max = type->revision_max;
158		}
159	rcu_read_unlock();
160	if (found)
161		return 0;
162
163	return retry ? -IPSET_ERR_FIND_TYPE :
164		__find_set_type_minmax(name, family, min, max, true);
165}
166
167#define family_name(f)	((f) == NFPROTO_IPV4 ? "inet" : \
168			 (f) == NFPROTO_IPV6 ? "inet6" : "any")
169
170/* Register a set type structure. The type is identified by
171 * the unique triple of name, family and revision.
172 */
173int
174ip_set_type_register(struct ip_set_type *type)
175{
176	int ret = 0;
177
178	if (type->protocol != IPSET_PROTOCOL) {
179		pr_warning("ip_set type %s, family %s, revision %u:%u uses "
180			   "wrong protocol version %u (want %u)\n",
181			   type->name, family_name(type->family),
182			   type->revision_min, type->revision_max,
183			   type->protocol, IPSET_PROTOCOL);
184		return -EINVAL;
185	}
186
187	ip_set_type_lock();
188	if (find_set_type(type->name, type->family, type->revision_min)) {
189		/* Duplicate! */
190		pr_warning("ip_set type %s, family %s with revision min %u "
191			   "already registered!\n", type->name,
192			   family_name(type->family), type->revision_min);
193		ret = -EINVAL;
194		goto unlock;
195	}
196	list_add_rcu(&type->list, &ip_set_type_list);
197	pr_debug("type %s, family %s, revision %u:%u registered.\n",
198		 type->name, family_name(type->family),
199		 type->revision_min, type->revision_max);
200unlock:
201	ip_set_type_unlock();
202	return ret;
203}
204EXPORT_SYMBOL_GPL(ip_set_type_register);
205
206/* Unregister a set type. There's a small race with ip_set_create */
207void
208ip_set_type_unregister(struct ip_set_type *type)
209{
210	ip_set_type_lock();
211	if (!find_set_type(type->name, type->family, type->revision_min)) {
212		pr_warning("ip_set type %s, family %s with revision min %u "
213			   "not registered\n", type->name,
214			   family_name(type->family), type->revision_min);
215		goto unlock;
216	}
217	list_del_rcu(&type->list);
218	pr_debug("type %s, family %s with revision min %u unregistered.\n",
219		 type->name, family_name(type->family), type->revision_min);
220unlock:
221	ip_set_type_unlock();
222
223	synchronize_rcu();
224}
225EXPORT_SYMBOL_GPL(ip_set_type_unregister);
226
227/* Utility functions */
228void *
229ip_set_alloc(size_t size)
230{
231	void *members = NULL;
232
233	if (size < KMALLOC_MAX_SIZE)
234		members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
235
236	if (members) {
237		pr_debug("%p: allocated with kmalloc\n", members);
238		return members;
239	}
240
241	members = vzalloc(size);
242	if (!members)
243		return NULL;
244	pr_debug("%p: allocated with vmalloc\n", members);
245
246	return members;
247}
248EXPORT_SYMBOL_GPL(ip_set_alloc);
249
250void
251ip_set_free(void *members)
252{
253	pr_debug("%p: free with %s\n", members,
254		 is_vmalloc_addr(members) ? "vfree" : "kfree");
255	if (is_vmalloc_addr(members))
256		vfree(members);
257	else
258		kfree(members);
259}
260EXPORT_SYMBOL_GPL(ip_set_free);
261
262static inline bool
263flag_nested(const struct nlattr *nla)
264{
265	return nla->nla_type & NLA_F_NESTED;
266}
267
268static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
269	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
270	[IPSET_ATTR_IPADDR_IPV6]	= { .type = NLA_BINARY,
271					    .len = sizeof(struct in6_addr) },
272};
273
274int
275ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
276{
277	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
278
279	if (unlikely(!flag_nested(nla)))
280		return -IPSET_ERR_PROTOCOL;
281	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
282		return -IPSET_ERR_PROTOCOL;
283	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
284		return -IPSET_ERR_PROTOCOL;
285
286	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
287	return 0;
288}
289EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
290
291int
292ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
293{
294	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
295
296	if (unlikely(!flag_nested(nla)))
297		return -IPSET_ERR_PROTOCOL;
298
299	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
300		return -IPSET_ERR_PROTOCOL;
301	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
302		return -IPSET_ERR_PROTOCOL;
303
304	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
305		sizeof(struct in6_addr));
306	return 0;
307}
308EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
309
310/*
311 * Creating/destroying/renaming/swapping affect the existence and
312 * the properties of a set. All of these can be executed from userspace
313 * only and serialized by the nfnl mutex indirectly from nfnetlink.
314 *
315 * Sets are identified by their index in ip_set_list and the index
316 * is used by the external references (set/SET netfilter modules).
317 *
318 * The set behind an index may change by swapping only, from userspace.
319 */
320
321static inline void
322__ip_set_get(ip_set_id_t index)
323{
324	write_lock_bh(&ip_set_ref_lock);
325	ip_set_list[index]->ref++;
326	write_unlock_bh(&ip_set_ref_lock);
327}
328
329static inline void
330__ip_set_put(ip_set_id_t index)
331{
332	write_lock_bh(&ip_set_ref_lock);
333	BUG_ON(ip_set_list[index]->ref == 0);
334	ip_set_list[index]->ref--;
335	write_unlock_bh(&ip_set_ref_lock);
336}
337
338/*
339 * Add, del and test set entries from kernel.
340 *
341 * The set behind the index must exist and must be referenced
342 * so it can't be destroyed (or changed) under our foot.
343 */
344
345int
346ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
347	    const struct xt_action_param *par,
348	    const struct ip_set_adt_opt *opt)
349{
350	struct ip_set *set = ip_set_list[index];
351	int ret = 0;
352
353	BUG_ON(set == NULL);
354	pr_debug("set %s, index %u\n", set->name, index);
355
356	if (opt->dim < set->type->dimension ||
357	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
358		return 0;
359
360	read_lock_bh(&set->lock);
361	ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
362	read_unlock_bh(&set->lock);
363
364	if (ret == -EAGAIN) {
365		/* Type requests element to be completed */
366		pr_debug("element must be competed, ADD is triggered\n");
367		write_lock_bh(&set->lock);
368		set->variant->kadt(set, skb, par, IPSET_ADD, opt);
369		write_unlock_bh(&set->lock);
370		ret = 1;
371	}
372
373	/* Convert error codes to nomatch */
374	return (ret < 0 ? 0 : ret);
375}
376EXPORT_SYMBOL_GPL(ip_set_test);
377
378int
379ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
380	   const struct xt_action_param *par,
381	   const struct ip_set_adt_opt *opt)
382{
383	struct ip_set *set = ip_set_list[index];
384	int ret;
385
386	BUG_ON(set == NULL);
387	pr_debug("set %s, index %u\n", set->name, index);
388
389	if (opt->dim < set->type->dimension ||
390	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
391		return 0;
392
393	write_lock_bh(&set->lock);
394	ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
395	write_unlock_bh(&set->lock);
396
397	return ret;
398}
399EXPORT_SYMBOL_GPL(ip_set_add);
400
401int
402ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
403	   const struct xt_action_param *par,
404	   const struct ip_set_adt_opt *opt)
405{
406	struct ip_set *set = ip_set_list[index];
407	int ret = 0;
408
409	BUG_ON(set == NULL);
410	pr_debug("set %s, index %u\n", set->name, index);
411
412	if (opt->dim < set->type->dimension ||
413	    !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
414		return 0;
415
416	write_lock_bh(&set->lock);
417	ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
418	write_unlock_bh(&set->lock);
419
420	return ret;
421}
422EXPORT_SYMBOL_GPL(ip_set_del);
423
424/*
425 * Find set by name, reference it once. The reference makes sure the
426 * thing pointed to, does not go away under our feet.
427 *
428 */
429ip_set_id_t
430ip_set_get_byname(const char *name, struct ip_set **set)
431{
432	ip_set_id_t i, index = IPSET_INVALID_ID;
433	struct ip_set *s;
434
435	for (i = 0; i < ip_set_max; i++) {
436		s = ip_set_list[i];
437		if (s != NULL && STREQ(s->name, name)) {
438			__ip_set_get(i);
439			index = i;
440			*set = s;
441		}
442	}
443
444	return index;
445}
446EXPORT_SYMBOL_GPL(ip_set_get_byname);
447
448/*
449 * If the given set pointer points to a valid set, decrement
450 * reference count by 1. The caller shall not assume the index
451 * to be valid, after calling this function.
452 *
453 */
454void
455ip_set_put_byindex(ip_set_id_t index)
456{
457	if (ip_set_list[index] != NULL)
458		__ip_set_put(index);
459}
460EXPORT_SYMBOL_GPL(ip_set_put_byindex);
461
462/*
463 * Get the name of a set behind a set index.
464 * We assume the set is referenced, so it does exist and
465 * can't be destroyed. The set cannot be renamed due to
466 * the referencing either.
467 *
468 */
469const char *
470ip_set_name_byindex(ip_set_id_t index)
471{
472	const struct ip_set *set = ip_set_list[index];
473
474	BUG_ON(set == NULL);
475	BUG_ON(set->ref == 0);
476
477	/* Referenced, so it's safe */
478	return set->name;
479}
480EXPORT_SYMBOL_GPL(ip_set_name_byindex);
481
482/*
483 * Routines to call by external subsystems, which do not
484 * call nfnl_lock for us.
485 */
486
487/*
488 * Find set by name, reference it once. The reference makes sure the
489 * thing pointed to, does not go away under our feet.
490 *
491 * The nfnl mutex is used in the function.
492 */
493ip_set_id_t
494ip_set_nfnl_get(const char *name)
495{
496	struct ip_set *s;
497	ip_set_id_t index;
498
499	nfnl_lock();
500	index = ip_set_get_byname(name, &s);
501	nfnl_unlock();
502
503	return index;
504}
505EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
506
507/*
508 * Find set by index, reference it once. The reference makes sure the
509 * thing pointed to, does not go away under our feet.
510 *
511 * The nfnl mutex is used in the function.
512 */
513ip_set_id_t
514ip_set_nfnl_get_byindex(ip_set_id_t index)
515{
516	if (index > ip_set_max)
517		return IPSET_INVALID_ID;
518
519	nfnl_lock();
520	if (ip_set_list[index])
521		__ip_set_get(index);
522	else
523		index = IPSET_INVALID_ID;
524	nfnl_unlock();
525
526	return index;
527}
528EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
529
530/*
531 * If the given set pointer points to a valid set, decrement
532 * reference count by 1. The caller shall not assume the index
533 * to be valid, after calling this function.
534 *
535 * The nfnl mutex is used in the function.
536 */
537void
538ip_set_nfnl_put(ip_set_id_t index)
539{
540	nfnl_lock();
541	ip_set_put_byindex(index);
542	nfnl_unlock();
543}
544EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
545
546/*
547 * Communication protocol with userspace over netlink.
548 *
549 * The commands are serialized by the nfnl mutex.
550 */
551
552static inline bool
553protocol_failed(const struct nlattr * const tb[])
554{
555	return !tb[IPSET_ATTR_PROTOCOL] ||
556	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
557}
558
559static inline u32
560flag_exist(const struct nlmsghdr *nlh)
561{
562	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
563}
564
565static struct nlmsghdr *
566start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
567	  enum ipset_cmd cmd)
568{
569	struct nlmsghdr *nlh;
570	struct nfgenmsg *nfmsg;
571
572	nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
573			sizeof(*nfmsg), flags);
574	if (nlh == NULL)
575		return NULL;
576
577	nfmsg = nlmsg_data(nlh);
578	nfmsg->nfgen_family = NFPROTO_IPV4;
579	nfmsg->version = NFNETLINK_V0;
580	nfmsg->res_id = 0;
581
582	return nlh;
583}
584
585/* Create a set */
586
587static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
588	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
589	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
590				    .len = IPSET_MAXNAMELEN - 1 },
591	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
592				    .len = IPSET_MAXNAMELEN - 1},
593	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
594	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
595	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
596};
597
598static ip_set_id_t
599find_set_id(const char *name)
600{
601	ip_set_id_t i, index = IPSET_INVALID_ID;
602	const struct ip_set *set;
603
604	for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
605		set = ip_set_list[i];
606		if (set != NULL && STREQ(set->name, name))
607			index = i;
608	}
609	return index;
610}
611
612static inline struct ip_set *
613find_set(const char *name)
614{
615	ip_set_id_t index = find_set_id(name);
616
617	return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
618}
619
620static int
621find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
622{
623	ip_set_id_t i;
624
625	*index = IPSET_INVALID_ID;
626	for (i = 0;  i < ip_set_max; i++) {
627		if (ip_set_list[i] == NULL) {
628			if (*index == IPSET_INVALID_ID)
629				*index = i;
630		} else if (STREQ(name, ip_set_list[i]->name)) {
631			/* Name clash */
632			*set = ip_set_list[i];
633			return -EEXIST;
634		}
635	}
636	if (*index == IPSET_INVALID_ID)
637		/* No free slot remained */
638		return -IPSET_ERR_MAX_SETS;
639	return 0;
640}
641
642static int
643ip_set_none(struct sock *ctnl, struct sk_buff *skb,
644	    const struct nlmsghdr *nlh,
645	    const struct nlattr * const attr[])
646{
647	return -EOPNOTSUPP;
648}
649
650static int
651ip_set_create(struct sock *ctnl, struct sk_buff *skb,
652	      const struct nlmsghdr *nlh,
653	      const struct nlattr * const attr[])
654{
655	struct ip_set *set, *clash = NULL;
656	ip_set_id_t index = IPSET_INVALID_ID;
657	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
658	const char *name, *typename;
659	u8 family, revision;
660	u32 flags = flag_exist(nlh);
661	int ret = 0;
662
663	if (unlikely(protocol_failed(attr) ||
664		     attr[IPSET_ATTR_SETNAME] == NULL ||
665		     attr[IPSET_ATTR_TYPENAME] == NULL ||
666		     attr[IPSET_ATTR_REVISION] == NULL ||
667		     attr[IPSET_ATTR_FAMILY] == NULL ||
668		     (attr[IPSET_ATTR_DATA] != NULL &&
669		      !flag_nested(attr[IPSET_ATTR_DATA]))))
670		return -IPSET_ERR_PROTOCOL;
671
672	name = nla_data(attr[IPSET_ATTR_SETNAME]);
673	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
674	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
675	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
676	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
677		 name, typename, family_name(family), revision);
678
679	/*
680	 * First, and without any locks, allocate and initialize
681	 * a normal base set structure.
682	 */
683	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
684	if (!set)
685		return -ENOMEM;
686	rwlock_init(&set->lock);
687	strlcpy(set->name, name, IPSET_MAXNAMELEN);
688	set->family = family;
689	set->revision = revision;
690
691	/*
692	 * Next, check that we know the type, and take
693	 * a reference on the type, to make sure it stays available
694	 * while constructing our new set.
695	 *
696	 * After referencing the type, we try to create the type
697	 * specific part of the set without holding any locks.
698	 */
699	ret = find_set_type_get(typename, family, revision, &(set->type));
700	if (ret)
701		goto out;
702
703	/*
704	 * Without holding any locks, create private part.
705	 */
706	if (attr[IPSET_ATTR_DATA] &&
707	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
708			     set->type->create_policy)) {
709		ret = -IPSET_ERR_PROTOCOL;
710		goto put_out;
711	}
712
713	ret = set->type->create(set, tb, flags);
714	if (ret != 0)
715		goto put_out;
716
717	/* BTW, ret==0 here. */
718
719	/*
720	 * Here, we have a valid, constructed set and we are protected
721	 * by the nfnl mutex. Find the first free index in ip_set_list
722	 * and check clashing.
723	 */
724	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
725		/* If this is the same set and requested, ignore error */
726		if (ret == -EEXIST &&
727		    (flags & IPSET_FLAG_EXIST) &&
728		    STREQ(set->type->name, clash->type->name) &&
729		    set->type->family == clash->type->family &&
730		    set->type->revision_min == clash->type->revision_min &&
731		    set->type->revision_max == clash->type->revision_max &&
732		    set->variant->same_set(set, clash))
733			ret = 0;
734		goto cleanup;
735	}
736
737	/*
738	 * Finally! Add our shiny new set to the list, and be done.
739	 */
740	pr_debug("create: '%s' created with index %u!\n", set->name, index);
741	ip_set_list[index] = set;
742
743	return ret;
744
745cleanup:
746	set->variant->destroy(set);
747put_out:
748	module_put(set->type->me);
749out:
750	kfree(set);
751	return ret;
752}
753
754/* Destroy sets */
755
756static const struct nla_policy
757ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
758	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
759	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
760				    .len = IPSET_MAXNAMELEN - 1 },
761};
762
763static void
764ip_set_destroy_set(ip_set_id_t index)
765{
766	struct ip_set *set = ip_set_list[index];
767
768	pr_debug("set: %s\n",  set->name);
769	ip_set_list[index] = NULL;
770
771	/* Must call it without holding any lock */
772	set->variant->destroy(set);
773	module_put(set->type->me);
774	kfree(set);
775}
776
777static int
778ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
779	       const struct nlmsghdr *nlh,
780	       const struct nlattr * const attr[])
781{
782	ip_set_id_t i;
783	int ret = 0;
784
785	if (unlikely(protocol_failed(attr)))
786		return -IPSET_ERR_PROTOCOL;
787
788	/* Commands are serialized and references are
789	 * protected by the ip_set_ref_lock.
790	 * External systems (i.e. xt_set) must call
791	 * ip_set_put|get_nfnl_* functions, that way we
792	 * can safely check references here.
793	 *
794	 * list:set timer can only decrement the reference
795	 * counter, so if it's already zero, we can proceed
796	 * without holding the lock.
797	 */
798	read_lock_bh(&ip_set_ref_lock);
799	if (!attr[IPSET_ATTR_SETNAME]) {
800		for (i = 0; i < ip_set_max; i++) {
801			if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
802				ret = -IPSET_ERR_BUSY;
803				goto out;
804			}
805		}
806		read_unlock_bh(&ip_set_ref_lock);
807		for (i = 0; i < ip_set_max; i++) {
808			if (ip_set_list[i] != NULL)
809				ip_set_destroy_set(i);
810		}
811	} else {
812		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
813		if (i == IPSET_INVALID_ID) {
814			ret = -ENOENT;
815			goto out;
816		} else if (ip_set_list[i]->ref) {
817			ret = -IPSET_ERR_BUSY;
818			goto out;
819		}
820		read_unlock_bh(&ip_set_ref_lock);
821
822		ip_set_destroy_set(i);
823	}
824	return 0;
825out:
826	read_unlock_bh(&ip_set_ref_lock);
827	return ret;
828}
829
830/* Flush sets */
831
832static void
833ip_set_flush_set(struct ip_set *set)
834{
835	pr_debug("set: %s\n",  set->name);
836
837	write_lock_bh(&set->lock);
838	set->variant->flush(set);
839	write_unlock_bh(&set->lock);
840}
841
842static int
843ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
844	     const struct nlmsghdr *nlh,
845	     const struct nlattr * const attr[])
846{
847	ip_set_id_t i;
848
849	if (unlikely(protocol_failed(attr)))
850		return -IPSET_ERR_PROTOCOL;
851
852	if (!attr[IPSET_ATTR_SETNAME]) {
853		for (i = 0; i < ip_set_max; i++)
854			if (ip_set_list[i] != NULL)
855				ip_set_flush_set(ip_set_list[i]);
856	} else {
857		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
858		if (i == IPSET_INVALID_ID)
859			return -ENOENT;
860
861		ip_set_flush_set(ip_set_list[i]);
862	}
863
864	return 0;
865}
866
867/* Rename a set */
868
869static const struct nla_policy
870ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
871	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
872	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
873				    .len = IPSET_MAXNAMELEN - 1 },
874	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
875				    .len = IPSET_MAXNAMELEN - 1 },
876};
877
878static int
879ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
880	      const struct nlmsghdr *nlh,
881	      const struct nlattr * const attr[])
882{
883	struct ip_set *set;
884	const char *name2;
885	ip_set_id_t i;
886	int ret = 0;
887
888	if (unlikely(protocol_failed(attr) ||
889		     attr[IPSET_ATTR_SETNAME] == NULL ||
890		     attr[IPSET_ATTR_SETNAME2] == NULL))
891		return -IPSET_ERR_PROTOCOL;
892
893	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
894	if (set == NULL)
895		return -ENOENT;
896
897	read_lock_bh(&ip_set_ref_lock);
898	if (set->ref != 0) {
899		ret = -IPSET_ERR_REFERENCED;
900		goto out;
901	}
902
903	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
904	for (i = 0; i < ip_set_max; i++) {
905		if (ip_set_list[i] != NULL &&
906		    STREQ(ip_set_list[i]->name, name2)) {
907			ret = -IPSET_ERR_EXIST_SETNAME2;
908			goto out;
909		}
910	}
911	strncpy(set->name, name2, IPSET_MAXNAMELEN);
912
913out:
914	read_unlock_bh(&ip_set_ref_lock);
915	return ret;
916}
917
918/* Swap two sets so that name/index points to the other.
919 * References and set names are also swapped.
920 *
921 * The commands are serialized by the nfnl mutex and references are
922 * protected by the ip_set_ref_lock. The kernel interfaces
923 * do not hold the mutex but the pointer settings are atomic
924 * so the ip_set_list always contains valid pointers to the sets.
925 */
926
927static int
928ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
929	    const struct nlmsghdr *nlh,
930	    const struct nlattr * const attr[])
931{
932	struct ip_set *from, *to;
933	ip_set_id_t from_id, to_id;
934	char from_name[IPSET_MAXNAMELEN];
935
936	if (unlikely(protocol_failed(attr) ||
937		     attr[IPSET_ATTR_SETNAME] == NULL ||
938		     attr[IPSET_ATTR_SETNAME2] == NULL))
939		return -IPSET_ERR_PROTOCOL;
940
941	from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
942	if (from_id == IPSET_INVALID_ID)
943		return -ENOENT;
944
945	to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
946	if (to_id == IPSET_INVALID_ID)
947		return -IPSET_ERR_EXIST_SETNAME2;
948
949	from = ip_set_list[from_id];
950	to = ip_set_list[to_id];
951
952	/* Features must not change.
953	 * Not an artificial restriction anymore, as we must prevent
954	 * possible loops created by swapping in setlist type of sets. */
955	if (!(from->type->features == to->type->features &&
956	      from->type->family == to->type->family))
957		return -IPSET_ERR_TYPE_MISMATCH;
958
959	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
960	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
961	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
962
963	write_lock_bh(&ip_set_ref_lock);
964	swap(from->ref, to->ref);
965	ip_set_list[from_id] = to;
966	ip_set_list[to_id] = from;
967	write_unlock_bh(&ip_set_ref_lock);
968
969	return 0;
970}
971
972/* List/save set data */
973
974#define DUMP_INIT	0
975#define DUMP_ALL	1
976#define DUMP_ONE	2
977#define DUMP_LAST	3
978
979#define DUMP_TYPE(arg)		(((u32)(arg)) & 0x0000FFFF)
980#define DUMP_FLAGS(arg)		(((u32)(arg)) >> 16)
981
982static int
983ip_set_dump_done(struct netlink_callback *cb)
984{
985	if (cb->args[2]) {
986		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
987		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
988	}
989	return 0;
990}
991
992static inline void
993dump_attrs(struct nlmsghdr *nlh)
994{
995	const struct nlattr *attr;
996	int rem;
997
998	pr_debug("dump nlmsg\n");
999	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1000		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1001	}
1002}
1003
1004static int
1005dump_init(struct netlink_callback *cb)
1006{
1007	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1008	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1009	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1010	struct nlattr *attr = (void *)nlh + min_len;
1011	u32 dump_type;
1012	ip_set_id_t index;
1013
1014	/* Second pass, so parser can't fail */
1015	nla_parse(cda, IPSET_ATTR_CMD_MAX,
1016		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1017
1018	/* cb->args[0] : dump single set/all sets
1019	 *         [1] : set index
1020	 *         [..]: type specific
1021	 */
1022
1023	if (cda[IPSET_ATTR_SETNAME]) {
1024		index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
1025		if (index == IPSET_INVALID_ID)
1026			return -ENOENT;
1027
1028		dump_type = DUMP_ONE;
1029		cb->args[1] = index;
1030	} else
1031		dump_type = DUMP_ALL;
1032
1033	if (cda[IPSET_ATTR_FLAGS]) {
1034		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1035		dump_type |= (f << 16);
1036	}
1037	cb->args[0] = dump_type;
1038
1039	return 0;
1040}
1041
1042static int
1043ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1044{
1045	ip_set_id_t index = IPSET_INVALID_ID, max;
1046	struct ip_set *set = NULL;
1047	struct nlmsghdr *nlh = NULL;
1048	unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
1049	u32 dump_type, dump_flags;
1050	int ret = 0;
1051
1052	if (!cb->args[0]) {
1053		ret = dump_init(cb);
1054		if (ret < 0) {
1055			nlh = nlmsg_hdr(cb->skb);
1056			/* We have to create and send the error message
1057			 * manually :-( */
1058			if (nlh->nlmsg_flags & NLM_F_ACK)
1059				netlink_ack(cb->skb, nlh, ret);
1060			return ret;
1061		}
1062	}
1063
1064	if (cb->args[1] >= ip_set_max)
1065		goto out;
1066
1067	dump_type = DUMP_TYPE(cb->args[0]);
1068	dump_flags = DUMP_FLAGS(cb->args[0]);
1069	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
1070dump_last:
1071	pr_debug("args[0]: %u %u args[1]: %ld\n",
1072		 dump_type, dump_flags, cb->args[1]);
1073	for (; cb->args[1] < max; cb->args[1]++) {
1074		index = (ip_set_id_t) cb->args[1];
1075		set = ip_set_list[index];
1076		if (set == NULL) {
1077			if (dump_type == DUMP_ONE) {
1078				ret = -ENOENT;
1079				goto out;
1080			}
1081			continue;
1082		}
1083		/* When dumping all sets, we must dump "sorted"
1084		 * so that lists (unions of sets) are dumped last.
1085		 */
1086		if (dump_type != DUMP_ONE &&
1087		    ((dump_type == DUMP_ALL) ==
1088		     !!(set->type->features & IPSET_DUMP_LAST)))
1089			continue;
1090		pr_debug("List set: %s\n", set->name);
1091		if (!cb->args[2]) {
1092			/* Start listing: make sure set won't be destroyed */
1093			pr_debug("reference set\n");
1094			__ip_set_get(index);
1095		}
1096		nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
1097				cb->nlh->nlmsg_seq, flags,
1098				IPSET_CMD_LIST);
1099		if (!nlh) {
1100			ret = -EMSGSIZE;
1101			goto release_refcount;
1102		}
1103		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1104		    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1105			goto nla_put_failure;
1106		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1107			goto next_set;
1108		switch (cb->args[2]) {
1109		case 0:
1110			/* Core header data */
1111			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1112					   set->type->name) ||
1113			    nla_put_u8(skb, IPSET_ATTR_FAMILY,
1114				       set->family) ||
1115			    nla_put_u8(skb, IPSET_ATTR_REVISION,
1116				       set->revision))
1117				goto nla_put_failure;
1118			ret = set->variant->head(set, skb);
1119			if (ret < 0)
1120				goto release_refcount;
1121			if (dump_flags & IPSET_FLAG_LIST_HEADER)
1122				goto next_set;
1123			/* Fall through and add elements */
1124		default:
1125			read_lock_bh(&set->lock);
1126			ret = set->variant->list(set, skb, cb);
1127			read_unlock_bh(&set->lock);
1128			if (!cb->args[2])
1129				/* Set is done, proceed with next one */
1130				goto next_set;
1131			goto release_refcount;
1132		}
1133	}
1134	/* If we dump all sets, continue with dumping last ones */
1135	if (dump_type == DUMP_ALL) {
1136		dump_type = DUMP_LAST;
1137		cb->args[0] = dump_type | (dump_flags << 16);
1138		cb->args[1] = 0;
1139		goto dump_last;
1140	}
1141	goto out;
1142
1143nla_put_failure:
1144	ret = -EFAULT;
1145next_set:
1146	if (dump_type == DUMP_ONE)
1147		cb->args[1] = IPSET_INVALID_ID;
1148	else
1149		cb->args[1]++;
1150release_refcount:
1151	/* If there was an error or set is done, release set */
1152	if (ret || !cb->args[2]) {
1153		pr_debug("release set %s\n", ip_set_list[index]->name);
1154		ip_set_put_byindex(index);
1155		cb->args[2] = 0;
1156	}
1157out:
1158	if (nlh) {
1159		nlmsg_end(skb, nlh);
1160		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1161		dump_attrs(nlh);
1162	}
1163
1164	return ret < 0 ? ret : skb->len;
1165}
1166
1167static int
1168ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1169	    const struct nlmsghdr *nlh,
1170	    const struct nlattr * const attr[])
1171{
1172	if (unlikely(protocol_failed(attr)))
1173		return -IPSET_ERR_PROTOCOL;
1174
1175	{
1176		struct netlink_dump_control c = {
1177			.dump = ip_set_dump_start,
1178			.done = ip_set_dump_done,
1179		};
1180		return netlink_dump_start(ctnl, skb, nlh, &c);
1181	}
1182}
1183
1184/* Add, del and test */
1185
1186static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1187	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1188	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1189				    .len = IPSET_MAXNAMELEN - 1 },
1190	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
1191	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
1192	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
1193};
1194
1195static int
1196call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1197	struct nlattr *tb[], enum ipset_adt adt,
1198	u32 flags, bool use_lineno)
1199{
1200	int ret;
1201	u32 lineno = 0;
1202	bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
1203
1204	do {
1205		write_lock_bh(&set->lock);
1206		ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
1207		write_unlock_bh(&set->lock);
1208		retried = true;
1209	} while (ret == -EAGAIN &&
1210		 set->variant->resize &&
1211		 (ret = set->variant->resize(set, retried)) == 0);
1212
1213	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1214		return 0;
1215	if (lineno && use_lineno) {
1216		/* Error in restore/batch mode: send back lineno */
1217		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1218		struct sk_buff *skb2;
1219		struct nlmsgerr *errmsg;
1220		size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1221		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1222		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1223		struct nlattr *cmdattr;
1224		u32 *errline;
1225
1226		skb2 = nlmsg_new(payload, GFP_KERNEL);
1227		if (skb2 == NULL)
1228			return -ENOMEM;
1229		rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
1230				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1231		errmsg = nlmsg_data(rep);
1232		errmsg->error = ret;
1233		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1234		cmdattr = (void *)&errmsg->msg + min_len;
1235
1236		nla_parse(cda, IPSET_ATTR_CMD_MAX,
1237			  cmdattr, nlh->nlmsg_len - min_len,
1238			  ip_set_adt_policy);
1239
1240		errline = nla_data(cda[IPSET_ATTR_LINENO]);
1241
1242		*errline = lineno;
1243
1244		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1245		/* Signal netlink not to send its ACK/errmsg.  */
1246		return -EINTR;
1247	}
1248
1249	return ret;
1250}
1251
1252static int
1253ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1254	    const struct nlmsghdr *nlh,
1255	    const struct nlattr * const attr[])
1256{
1257	struct ip_set *set;
1258	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1259	const struct nlattr *nla;
1260	u32 flags = flag_exist(nlh);
1261	bool use_lineno;
1262	int ret = 0;
1263
1264	if (unlikely(protocol_failed(attr) ||
1265		     attr[IPSET_ATTR_SETNAME] == NULL ||
1266		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1267		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1268		     (attr[IPSET_ATTR_DATA] != NULL &&
1269		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1270		     (attr[IPSET_ATTR_ADT] != NULL &&
1271		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1272		       attr[IPSET_ATTR_LINENO] == NULL))))
1273		return -IPSET_ERR_PROTOCOL;
1274
1275	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1276	if (set == NULL)
1277		return -ENOENT;
1278
1279	use_lineno = !!attr[IPSET_ATTR_LINENO];
1280	if (attr[IPSET_ATTR_DATA]) {
1281		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1282				     attr[IPSET_ATTR_DATA],
1283				     set->type->adt_policy))
1284			return -IPSET_ERR_PROTOCOL;
1285		ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1286			      use_lineno);
1287	} else {
1288		int nla_rem;
1289
1290		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1291			memset(tb, 0, sizeof(tb));
1292			if (nla_type(nla) != IPSET_ATTR_DATA ||
1293			    !flag_nested(nla) ||
1294			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1295					     set->type->adt_policy))
1296				return -IPSET_ERR_PROTOCOL;
1297			ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1298				      flags, use_lineno);
1299			if (ret < 0)
1300				return ret;
1301		}
1302	}
1303	return ret;
1304}
1305
1306static int
1307ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1308	    const struct nlmsghdr *nlh,
1309	    const struct nlattr * const attr[])
1310{
1311	struct ip_set *set;
1312	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1313	const struct nlattr *nla;
1314	u32 flags = flag_exist(nlh);
1315	bool use_lineno;
1316	int ret = 0;
1317
1318	if (unlikely(protocol_failed(attr) ||
1319		     attr[IPSET_ATTR_SETNAME] == NULL ||
1320		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1321		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1322		     (attr[IPSET_ATTR_DATA] != NULL &&
1323		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1324		     (attr[IPSET_ATTR_ADT] != NULL &&
1325		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1326		       attr[IPSET_ATTR_LINENO] == NULL))))
1327		return -IPSET_ERR_PROTOCOL;
1328
1329	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1330	if (set == NULL)
1331		return -ENOENT;
1332
1333	use_lineno = !!attr[IPSET_ATTR_LINENO];
1334	if (attr[IPSET_ATTR_DATA]) {
1335		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1336				     attr[IPSET_ATTR_DATA],
1337				     set->type->adt_policy))
1338			return -IPSET_ERR_PROTOCOL;
1339		ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1340			      use_lineno);
1341	} else {
1342		int nla_rem;
1343
1344		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1345			memset(tb, 0, sizeof(*tb));
1346			if (nla_type(nla) != IPSET_ATTR_DATA ||
1347			    !flag_nested(nla) ||
1348			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1349					     set->type->adt_policy))
1350				return -IPSET_ERR_PROTOCOL;
1351			ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1352				      flags, use_lineno);
1353			if (ret < 0)
1354				return ret;
1355		}
1356	}
1357	return ret;
1358}
1359
1360static int
1361ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1362	     const struct nlmsghdr *nlh,
1363	     const struct nlattr * const attr[])
1364{
1365	struct ip_set *set;
1366	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1367	int ret = 0;
1368
1369	if (unlikely(protocol_failed(attr) ||
1370		     attr[IPSET_ATTR_SETNAME] == NULL ||
1371		     attr[IPSET_ATTR_DATA] == NULL ||
1372		     !flag_nested(attr[IPSET_ATTR_DATA])))
1373		return -IPSET_ERR_PROTOCOL;
1374
1375	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1376	if (set == NULL)
1377		return -ENOENT;
1378
1379	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1380			     set->type->adt_policy))
1381		return -IPSET_ERR_PROTOCOL;
1382
1383	read_lock_bh(&set->lock);
1384	ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
1385	read_unlock_bh(&set->lock);
1386	/* Userspace can't trigger element to be re-added */
1387	if (ret == -EAGAIN)
1388		ret = 1;
1389
1390	return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1391}
1392
1393/* Get headed data of a set */
1394
1395static int
1396ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1397	      const struct nlmsghdr *nlh,
1398	      const struct nlattr * const attr[])
1399{
1400	const struct ip_set *set;
1401	struct sk_buff *skb2;
1402	struct nlmsghdr *nlh2;
1403	ip_set_id_t index;
1404	int ret = 0;
1405
1406	if (unlikely(protocol_failed(attr) ||
1407		     attr[IPSET_ATTR_SETNAME] == NULL))
1408		return -IPSET_ERR_PROTOCOL;
1409
1410	index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1411	if (index == IPSET_INVALID_ID)
1412		return -ENOENT;
1413	set = ip_set_list[index];
1414
1415	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1416	if (skb2 == NULL)
1417		return -ENOMEM;
1418
1419	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1420			 IPSET_CMD_HEADER);
1421	if (!nlh2)
1422		goto nlmsg_failure;
1423	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1424	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1425	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1426	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1427	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1428		goto nla_put_failure;
1429	nlmsg_end(skb2, nlh2);
1430
1431	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1432	if (ret < 0)
1433		return ret;
1434
1435	return 0;
1436
1437nla_put_failure:
1438	nlmsg_cancel(skb2, nlh2);
1439nlmsg_failure:
1440	kfree_skb(skb2);
1441	return -EMSGSIZE;
1442}
1443
1444/* Get type data */
1445
1446static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1447	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1448	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
1449				    .len = IPSET_MAXNAMELEN - 1 },
1450	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
1451};
1452
1453static int
1454ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1455	    const struct nlmsghdr *nlh,
1456	    const struct nlattr * const attr[])
1457{
1458	struct sk_buff *skb2;
1459	struct nlmsghdr *nlh2;
1460	u8 family, min, max;
1461	const char *typename;
1462	int ret = 0;
1463
1464	if (unlikely(protocol_failed(attr) ||
1465		     attr[IPSET_ATTR_TYPENAME] == NULL ||
1466		     attr[IPSET_ATTR_FAMILY] == NULL))
1467		return -IPSET_ERR_PROTOCOL;
1468
1469	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1470	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1471	ret = find_set_type_minmax(typename, family, &min, &max);
1472	if (ret)
1473		return ret;
1474
1475	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1476	if (skb2 == NULL)
1477		return -ENOMEM;
1478
1479	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1480			 IPSET_CMD_TYPE);
1481	if (!nlh2)
1482		goto nlmsg_failure;
1483	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1484	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1485	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1486	    nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1487	    nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1488		goto nla_put_failure;
1489	nlmsg_end(skb2, nlh2);
1490
1491	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1492	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1493	if (ret < 0)
1494		return ret;
1495
1496	return 0;
1497
1498nla_put_failure:
1499	nlmsg_cancel(skb2, nlh2);
1500nlmsg_failure:
1501	kfree_skb(skb2);
1502	return -EMSGSIZE;
1503}
1504
1505/* Get protocol version */
1506
1507static const struct nla_policy
1508ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1509	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1510};
1511
1512static int
1513ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1514		const struct nlmsghdr *nlh,
1515		const struct nlattr * const attr[])
1516{
1517	struct sk_buff *skb2;
1518	struct nlmsghdr *nlh2;
1519	int ret = 0;
1520
1521	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1522		return -IPSET_ERR_PROTOCOL;
1523
1524	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1525	if (skb2 == NULL)
1526		return -ENOMEM;
1527
1528	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1529			 IPSET_CMD_PROTOCOL);
1530	if (!nlh2)
1531		goto nlmsg_failure;
1532	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1533		goto nla_put_failure;
1534	nlmsg_end(skb2, nlh2);
1535
1536	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1537	if (ret < 0)
1538		return ret;
1539
1540	return 0;
1541
1542nla_put_failure:
1543	nlmsg_cancel(skb2, nlh2);
1544nlmsg_failure:
1545	kfree_skb(skb2);
1546	return -EMSGSIZE;
1547}
1548
1549static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1550	[IPSET_CMD_NONE]	= {
1551		.call		= ip_set_none,
1552		.attr_count	= IPSET_ATTR_CMD_MAX,
1553	},
1554	[IPSET_CMD_CREATE]	= {
1555		.call		= ip_set_create,
1556		.attr_count	= IPSET_ATTR_CMD_MAX,
1557		.policy		= ip_set_create_policy,
1558	},
1559	[IPSET_CMD_DESTROY]	= {
1560		.call		= ip_set_destroy,
1561		.attr_count	= IPSET_ATTR_CMD_MAX,
1562		.policy		= ip_set_setname_policy,
1563	},
1564	[IPSET_CMD_FLUSH]	= {
1565		.call		= ip_set_flush,
1566		.attr_count	= IPSET_ATTR_CMD_MAX,
1567		.policy		= ip_set_setname_policy,
1568	},
1569	[IPSET_CMD_RENAME]	= {
1570		.call		= ip_set_rename,
1571		.attr_count	= IPSET_ATTR_CMD_MAX,
1572		.policy		= ip_set_setname2_policy,
1573	},
1574	[IPSET_CMD_SWAP]	= {
1575		.call		= ip_set_swap,
1576		.attr_count	= IPSET_ATTR_CMD_MAX,
1577		.policy		= ip_set_setname2_policy,
1578	},
1579	[IPSET_CMD_LIST]	= {
1580		.call		= ip_set_dump,
1581		.attr_count	= IPSET_ATTR_CMD_MAX,
1582		.policy		= ip_set_setname_policy,
1583	},
1584	[IPSET_CMD_SAVE]	= {
1585		.call		= ip_set_dump,
1586		.attr_count	= IPSET_ATTR_CMD_MAX,
1587		.policy		= ip_set_setname_policy,
1588	},
1589	[IPSET_CMD_ADD]	= {
1590		.call		= ip_set_uadd,
1591		.attr_count	= IPSET_ATTR_CMD_MAX,
1592		.policy		= ip_set_adt_policy,
1593	},
1594	[IPSET_CMD_DEL]	= {
1595		.call		= ip_set_udel,
1596		.attr_count	= IPSET_ATTR_CMD_MAX,
1597		.policy		= ip_set_adt_policy,
1598	},
1599	[IPSET_CMD_TEST]	= {
1600		.call		= ip_set_utest,
1601		.attr_count	= IPSET_ATTR_CMD_MAX,
1602		.policy		= ip_set_adt_policy,
1603	},
1604	[IPSET_CMD_HEADER]	= {
1605		.call		= ip_set_header,
1606		.attr_count	= IPSET_ATTR_CMD_MAX,
1607		.policy		= ip_set_setname_policy,
1608	},
1609	[IPSET_CMD_TYPE]	= {
1610		.call		= ip_set_type,
1611		.attr_count	= IPSET_ATTR_CMD_MAX,
1612		.policy		= ip_set_type_policy,
1613	},
1614	[IPSET_CMD_PROTOCOL]	= {
1615		.call		= ip_set_protocol,
1616		.attr_count	= IPSET_ATTR_CMD_MAX,
1617		.policy		= ip_set_protocol_policy,
1618	},
1619};
1620
1621static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1622	.name		= "ip_set",
1623	.subsys_id	= NFNL_SUBSYS_IPSET,
1624	.cb_count	= IPSET_MSG_MAX,
1625	.cb		= ip_set_netlink_subsys_cb,
1626};
1627
1628/* Interface to iptables/ip6tables */
1629
1630static int
1631ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1632{
1633	unsigned int *op;
1634	void *data;
1635	int copylen = *len, ret = 0;
1636
1637	if (!capable(CAP_NET_ADMIN))
1638		return -EPERM;
1639	if (optval != SO_IP_SET)
1640		return -EBADF;
1641	if (*len < sizeof(unsigned int))
1642		return -EINVAL;
1643
1644	data = vmalloc(*len);
1645	if (!data)
1646		return -ENOMEM;
1647	if (copy_from_user(data, user, *len) != 0) {
1648		ret = -EFAULT;
1649		goto done;
1650	}
1651	op = (unsigned int *) data;
1652
1653	if (*op < IP_SET_OP_VERSION) {
1654		/* Check the version at the beginning of operations */
1655		struct ip_set_req_version *req_version = data;
1656		if (req_version->version != IPSET_PROTOCOL) {
1657			ret = -EPROTO;
1658			goto done;
1659		}
1660	}
1661
1662	switch (*op) {
1663	case IP_SET_OP_VERSION: {
1664		struct ip_set_req_version *req_version = data;
1665
1666		if (*len != sizeof(struct ip_set_req_version)) {
1667			ret = -EINVAL;
1668			goto done;
1669		}
1670
1671		req_version->version = IPSET_PROTOCOL;
1672		ret = copy_to_user(user, req_version,
1673				   sizeof(struct ip_set_req_version));
1674		goto done;
1675	}
1676	case IP_SET_OP_GET_BYNAME: {
1677		struct ip_set_req_get_set *req_get = data;
1678
1679		if (*len != sizeof(struct ip_set_req_get_set)) {
1680			ret = -EINVAL;
1681			goto done;
1682		}
1683		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1684		nfnl_lock();
1685		req_get->set.index = find_set_id(req_get->set.name);
1686		nfnl_unlock();
1687		goto copy;
1688	}
1689	case IP_SET_OP_GET_BYINDEX: {
1690		struct ip_set_req_get_set *req_get = data;
1691
1692		if (*len != sizeof(struct ip_set_req_get_set) ||
1693		    req_get->set.index >= ip_set_max) {
1694			ret = -EINVAL;
1695			goto done;
1696		}
1697		nfnl_lock();
1698		strncpy(req_get->set.name,
1699			ip_set_list[req_get->set.index]
1700				? ip_set_list[req_get->set.index]->name : "",
1701			IPSET_MAXNAMELEN);
1702		nfnl_unlock();
1703		goto copy;
1704	}
1705	default:
1706		ret = -EBADMSG;
1707		goto done;
1708	}	/* end of switch(op) */
1709
1710copy:
1711	ret = copy_to_user(user, data, copylen);
1712
1713done:
1714	vfree(data);
1715	if (ret > 0)
1716		ret = 0;
1717	return ret;
1718}
1719
1720static struct nf_sockopt_ops so_set __read_mostly = {
1721	.pf		= PF_INET,
1722	.get_optmin	= SO_IP_SET,
1723	.get_optmax	= SO_IP_SET + 1,
1724	.get		= &ip_set_sockfn_get,
1725	.owner		= THIS_MODULE,
1726};
1727
1728static int __init
1729ip_set_init(void)
1730{
1731	int ret;
1732
1733	if (max_sets)
1734		ip_set_max = max_sets;
1735	if (ip_set_max >= IPSET_INVALID_ID)
1736		ip_set_max = IPSET_INVALID_ID - 1;
1737
1738	ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1739			      GFP_KERNEL);
1740	if (!ip_set_list)
1741		return -ENOMEM;
1742
1743	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1744	if (ret != 0) {
1745		pr_err("ip_set: cannot register with nfnetlink.\n");
1746		kfree(ip_set_list);
1747		return ret;
1748	}
1749	ret = nf_register_sockopt(&so_set);
1750	if (ret != 0) {
1751		pr_err("SO_SET registry failed: %d\n", ret);
1752		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1753		kfree(ip_set_list);
1754		return ret;
1755	}
1756
1757	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1758	return 0;
1759}
1760
1761static void __exit
1762ip_set_fini(void)
1763{
1764	/* There can't be any existing set */
1765	nf_unregister_sockopt(&so_set);
1766	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1767	kfree(ip_set_list);
1768	pr_debug("these are the famous last words\n");
1769}
1770
1771module_init(ip_set_init);
1772module_exit(ip_set_fini);
1773