ip_set_core.c revision 6604271c5bc658a6067ed0c3deba4d89e0e50382
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 *                         Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
20#include <linux/version.h>
21#include <net/netlink.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list);		/* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
29static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
30
31static struct ip_set **ip_set_list;		/* all individual sets */
32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
33
34#define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36static unsigned int max_sets;
37
38module_param(max_sets, int, 0600);
39MODULE_PARM_DESC(max_sets, "maximal number of sets");
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
45/*
46 * The set types are implemented in modules and registered set types
47 * can be found in ip_set_type_list. Adding/deleting types is
48 * serialized by ip_set_type_mutex.
49 */
50
51static inline void
52ip_set_type_lock(void)
53{
54	mutex_lock(&ip_set_type_mutex);
55}
56
57static inline void
58ip_set_type_unlock(void)
59{
60	mutex_unlock(&ip_set_type_mutex);
61}
62
63/* Register and deregister settype */
64
65static struct ip_set_type *
66find_set_type(const char *name, u8 family, u8 revision)
67{
68	struct ip_set_type *type;
69
70	list_for_each_entry_rcu(type, &ip_set_type_list, list)
71		if (STREQ(type->name, name) &&
72		    (type->family == family || type->family == AF_UNSPEC) &&
73		    type->revision == revision)
74			return type;
75	return NULL;
76}
77
78/* Unlock, try to load a set type module and lock again */
79static int
80try_to_load_type(const char *name)
81{
82	nfnl_unlock();
83	pr_debug("try to load ip_set_%s\n", name);
84	if (request_module("ip_set_%s", name) < 0) {
85		pr_warning("Can't find ip_set type %s\n", name);
86		nfnl_lock();
87		return -IPSET_ERR_FIND_TYPE;
88	}
89	nfnl_lock();
90	return -EAGAIN;
91}
92
93/* Find a set type and reference it */
94static int
95find_set_type_get(const char *name, u8 family, u8 revision,
96		  struct ip_set_type **found)
97{
98	struct ip_set_type *type;
99	int err;
100
101	rcu_read_lock();
102	*found = find_set_type(name, family, revision);
103	if (*found) {
104		err = !try_module_get((*found)->me) ? -EFAULT : 0;
105		goto unlock;
106	}
107	/* Make sure the type is loaded but we don't support the revision */
108	list_for_each_entry_rcu(type, &ip_set_type_list, list)
109		if (STREQ(type->name, name)) {
110			err = -IPSET_ERR_FIND_TYPE;
111			goto unlock;
112		}
113	rcu_read_unlock();
114
115	return try_to_load_type(name);
116
117unlock:
118	rcu_read_unlock();
119	return err;
120}
121
122/* Find a given set type by name and family.
123 * If we succeeded, the supported minimal and maximum revisions are
124 * filled out.
125 */
126static int
127find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
128{
129	struct ip_set_type *type;
130	bool found = false;
131
132	*min = 255; *max = 0;
133	rcu_read_lock();
134	list_for_each_entry_rcu(type, &ip_set_type_list, list)
135		if (STREQ(type->name, name) &&
136		    (type->family == family || type->family == AF_UNSPEC)) {
137			found = true;
138			if (type->revision < *min)
139				*min = type->revision;
140			if (type->revision > *max)
141				*max = type->revision;
142		}
143	rcu_read_unlock();
144	if (found)
145		return 0;
146
147	return try_to_load_type(name);
148}
149
150#define family_name(f)	((f) == AF_INET ? "inet" : \
151			 (f) == AF_INET6 ? "inet6" : "any")
152
153/* Register a set type structure. The type is identified by
154 * the unique triple of name, family and revision.
155 */
156int
157ip_set_type_register(struct ip_set_type *type)
158{
159	int ret = 0;
160
161	if (type->protocol != IPSET_PROTOCOL) {
162		pr_warning("ip_set type %s, family %s, revision %u uses "
163			   "wrong protocol version %u (want %u)\n",
164			   type->name, family_name(type->family),
165			   type->revision, type->protocol, IPSET_PROTOCOL);
166		return -EINVAL;
167	}
168
169	ip_set_type_lock();
170	if (find_set_type(type->name, type->family, type->revision)) {
171		/* Duplicate! */
172		pr_warning("ip_set type %s, family %s, revision %u "
173			   "already registered!\n", type->name,
174			   family_name(type->family), type->revision);
175		ret = -EINVAL;
176		goto unlock;
177	}
178	list_add_rcu(&type->list, &ip_set_type_list);
179	pr_debug("type %s, family %s, revision %u registered.\n",
180		 type->name, family_name(type->family), type->revision);
181unlock:
182	ip_set_type_unlock();
183	return ret;
184}
185EXPORT_SYMBOL_GPL(ip_set_type_register);
186
187/* Unregister a set type. There's a small race with ip_set_create */
188void
189ip_set_type_unregister(struct ip_set_type *type)
190{
191	ip_set_type_lock();
192	if (!find_set_type(type->name, type->family, type->revision)) {
193		pr_warning("ip_set type %s, family %s, revision %u "
194			   "not registered\n", type->name,
195			   family_name(type->family), type->revision);
196		goto unlock;
197	}
198	list_del_rcu(&type->list);
199	pr_debug("type %s, family %s, revision %u unregistered.\n",
200		 type->name, family_name(type->family), type->revision);
201unlock:
202	ip_set_type_unlock();
203
204	synchronize_rcu();
205}
206EXPORT_SYMBOL_GPL(ip_set_type_unregister);
207
208/* Utility functions */
209void *
210ip_set_alloc(size_t size)
211{
212	void *members = NULL;
213
214	if (size < KMALLOC_MAX_SIZE)
215		members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
216
217	if (members) {
218		pr_debug("%p: allocated with kmalloc\n", members);
219		return members;
220	}
221
222	members = vzalloc(size);
223	if (!members)
224		return NULL;
225	pr_debug("%p: allocated with vmalloc\n", members);
226
227	return members;
228}
229EXPORT_SYMBOL_GPL(ip_set_alloc);
230
231void
232ip_set_free(void *members)
233{
234	pr_debug("%p: free with %s\n", members,
235		 is_vmalloc_addr(members) ? "vfree" : "kfree");
236	if (is_vmalloc_addr(members))
237		vfree(members);
238	else
239		kfree(members);
240}
241EXPORT_SYMBOL_GPL(ip_set_free);
242
243static inline bool
244flag_nested(const struct nlattr *nla)
245{
246	return nla->nla_type & NLA_F_NESTED;
247}
248
249static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
250	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
251	[IPSET_ATTR_IPADDR_IPV6]	= { .type = NLA_BINARY,
252					    .len = sizeof(struct in6_addr) },
253};
254
255int
256ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
257{
258	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
259
260	if (unlikely(!flag_nested(nla)))
261		return -IPSET_ERR_PROTOCOL;
262	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
263		return -IPSET_ERR_PROTOCOL;
264	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
265		return -IPSET_ERR_PROTOCOL;
266
267	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
268	return 0;
269}
270EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
271
272int
273ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
274{
275	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
276
277	if (unlikely(!flag_nested(nla)))
278		return -IPSET_ERR_PROTOCOL;
279
280	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
281		return -IPSET_ERR_PROTOCOL;
282	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
283		return -IPSET_ERR_PROTOCOL;
284
285	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
286		sizeof(struct in6_addr));
287	return 0;
288}
289EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
290
291/*
292 * Creating/destroying/renaming/swapping affect the existence and
293 * the properties of a set. All of these can be executed from userspace
294 * only and serialized by the nfnl mutex indirectly from nfnetlink.
295 *
296 * Sets are identified by their index in ip_set_list and the index
297 * is used by the external references (set/SET netfilter modules).
298 *
299 * The set behind an index may change by swapping only, from userspace.
300 */
301
302static inline void
303__ip_set_get(ip_set_id_t index)
304{
305	write_lock_bh(&ip_set_ref_lock);
306	ip_set_list[index]->ref++;
307	write_unlock_bh(&ip_set_ref_lock);
308}
309
310static inline void
311__ip_set_put(ip_set_id_t index)
312{
313	write_lock_bh(&ip_set_ref_lock);
314	BUG_ON(ip_set_list[index]->ref == 0);
315	ip_set_list[index]->ref--;
316	write_unlock_bh(&ip_set_ref_lock);
317}
318
319/*
320 * Add, del and test set entries from kernel.
321 *
322 * The set behind the index must exist and must be referenced
323 * so it can't be destroyed (or changed) under our foot.
324 */
325
326int
327ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
328	    u8 family, u8 dim, u8 flags)
329{
330	struct ip_set *set = ip_set_list[index];
331	int ret = 0;
332
333	BUG_ON(set == NULL);
334	pr_debug("set %s, index %u\n", set->name, index);
335
336	if (dim < set->type->dimension ||
337	    !(family == set->family || set->family == AF_UNSPEC))
338		return 0;
339
340	read_lock_bh(&set->lock);
341	ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags);
342	read_unlock_bh(&set->lock);
343
344	if (ret == -EAGAIN) {
345		/* Type requests element to be completed */
346		pr_debug("element must be competed, ADD is triggered\n");
347		write_lock_bh(&set->lock);
348		set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
349		write_unlock_bh(&set->lock);
350		ret = 1;
351	}
352
353	/* Convert error codes to nomatch */
354	return (ret < 0 ? 0 : ret);
355}
356EXPORT_SYMBOL_GPL(ip_set_test);
357
358int
359ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
360	   u8 family, u8 dim, u8 flags)
361{
362	struct ip_set *set = ip_set_list[index];
363	int ret;
364
365	BUG_ON(set == NULL);
366	pr_debug("set %s, index %u\n", set->name, index);
367
368	if (dim < set->type->dimension ||
369	    !(family == set->family || set->family == AF_UNSPEC))
370		return 0;
371
372	write_lock_bh(&set->lock);
373	ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
374	write_unlock_bh(&set->lock);
375
376	return ret;
377}
378EXPORT_SYMBOL_GPL(ip_set_add);
379
380int
381ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
382	   u8 family, u8 dim, u8 flags)
383{
384	struct ip_set *set = ip_set_list[index];
385	int ret = 0;
386
387	BUG_ON(set == NULL);
388	pr_debug("set %s, index %u\n", set->name, index);
389
390	if (dim < set->type->dimension ||
391	    !(family == set->family || set->family == AF_UNSPEC))
392		return 0;
393
394	write_lock_bh(&set->lock);
395	ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags);
396	write_unlock_bh(&set->lock);
397
398	return ret;
399}
400EXPORT_SYMBOL_GPL(ip_set_del);
401
402/*
403 * Find set by name, reference it once. The reference makes sure the
404 * thing pointed to, does not go away under our feet.
405 *
406 */
407ip_set_id_t
408ip_set_get_byname(const char *name, struct ip_set **set)
409{
410	ip_set_id_t i, index = IPSET_INVALID_ID;
411	struct ip_set *s;
412
413	for (i = 0; i < ip_set_max; i++) {
414		s = ip_set_list[i];
415		if (s != NULL && STREQ(s->name, name)) {
416			__ip_set_get(i);
417			index = i;
418			*set = s;
419		}
420	}
421
422	return index;
423}
424EXPORT_SYMBOL_GPL(ip_set_get_byname);
425
426/*
427 * If the given set pointer points to a valid set, decrement
428 * reference count by 1. The caller shall not assume the index
429 * to be valid, after calling this function.
430 *
431 */
432void
433ip_set_put_byindex(ip_set_id_t index)
434{
435	if (ip_set_list[index] != NULL)
436		__ip_set_put(index);
437}
438EXPORT_SYMBOL_GPL(ip_set_put_byindex);
439
440/*
441 * Get the name of a set behind a set index.
442 * We assume the set is referenced, so it does exist and
443 * can't be destroyed. The set cannot be renamed due to
444 * the referencing either.
445 *
446 */
447const char *
448ip_set_name_byindex(ip_set_id_t index)
449{
450	const struct ip_set *set = ip_set_list[index];
451
452	BUG_ON(set == NULL);
453	BUG_ON(set->ref == 0);
454
455	/* Referenced, so it's safe */
456	return set->name;
457}
458EXPORT_SYMBOL_GPL(ip_set_name_byindex);
459
460/*
461 * Routines to call by external subsystems, which do not
462 * call nfnl_lock for us.
463 */
464
465/*
466 * Find set by name, reference it once. The reference makes sure the
467 * thing pointed to, does not go away under our feet.
468 *
469 * The nfnl mutex is used in the function.
470 */
471ip_set_id_t
472ip_set_nfnl_get(const char *name)
473{
474	struct ip_set *s;
475	ip_set_id_t index;
476
477	nfnl_lock();
478	index = ip_set_get_byname(name, &s);
479	nfnl_unlock();
480
481	return index;
482}
483EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
484
485/*
486 * Find set by index, reference it once. The reference makes sure the
487 * thing pointed to, does not go away under our feet.
488 *
489 * The nfnl mutex is used in the function.
490 */
491ip_set_id_t
492ip_set_nfnl_get_byindex(ip_set_id_t index)
493{
494	if (index > ip_set_max)
495		return IPSET_INVALID_ID;
496
497	nfnl_lock();
498	if (ip_set_list[index])
499		__ip_set_get(index);
500	else
501		index = IPSET_INVALID_ID;
502	nfnl_unlock();
503
504	return index;
505}
506EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
507
508/*
509 * If the given set pointer points to a valid set, decrement
510 * reference count by 1. The caller shall not assume the index
511 * to be valid, after calling this function.
512 *
513 * The nfnl mutex is used in the function.
514 */
515void
516ip_set_nfnl_put(ip_set_id_t index)
517{
518	nfnl_lock();
519	ip_set_put_byindex(index);
520	nfnl_unlock();
521}
522EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
523
524/*
525 * Communication protocol with userspace over netlink.
526 *
527 * The commands are serialized by the nfnl mutex.
528 */
529
530static inline bool
531protocol_failed(const struct nlattr * const tb[])
532{
533	return !tb[IPSET_ATTR_PROTOCOL] ||
534	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
535}
536
537static inline u32
538flag_exist(const struct nlmsghdr *nlh)
539{
540	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
541}
542
543static struct nlmsghdr *
544start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
545	  enum ipset_cmd cmd)
546{
547	struct nlmsghdr *nlh;
548	struct nfgenmsg *nfmsg;
549
550	nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
551			sizeof(*nfmsg), flags);
552	if (nlh == NULL)
553		return NULL;
554
555	nfmsg = nlmsg_data(nlh);
556	nfmsg->nfgen_family = AF_INET;
557	nfmsg->version = NFNETLINK_V0;
558	nfmsg->res_id = 0;
559
560	return nlh;
561}
562
563/* Create a set */
564
565static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
566	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
567	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
568				    .len = IPSET_MAXNAMELEN - 1 },
569	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
570				    .len = IPSET_MAXNAMELEN - 1},
571	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
572	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
573	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
574};
575
576static ip_set_id_t
577find_set_id(const char *name)
578{
579	ip_set_id_t i, index = IPSET_INVALID_ID;
580	const struct ip_set *set;
581
582	for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
583		set = ip_set_list[i];
584		if (set != NULL && STREQ(set->name, name))
585			index = i;
586	}
587	return index;
588}
589
590static inline struct ip_set *
591find_set(const char *name)
592{
593	ip_set_id_t index = find_set_id(name);
594
595	return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
596}
597
598static int
599find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
600{
601	ip_set_id_t i;
602
603	*index = IPSET_INVALID_ID;
604	for (i = 0;  i < ip_set_max; i++) {
605		if (ip_set_list[i] == NULL) {
606			if (*index == IPSET_INVALID_ID)
607				*index = i;
608		} else if (STREQ(name, ip_set_list[i]->name)) {
609			/* Name clash */
610			*set = ip_set_list[i];
611			return -EEXIST;
612		}
613	}
614	if (*index == IPSET_INVALID_ID)
615		/* No free slot remained */
616		return -IPSET_ERR_MAX_SETS;
617	return 0;
618}
619
620static int
621ip_set_create(struct sock *ctnl, struct sk_buff *skb,
622	      const struct nlmsghdr *nlh,
623	      const struct nlattr * const attr[])
624{
625	struct ip_set *set, *clash = NULL;
626	ip_set_id_t index = IPSET_INVALID_ID;
627	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
628	const char *name, *typename;
629	u8 family, revision;
630	u32 flags = flag_exist(nlh);
631	int ret = 0;
632
633	if (unlikely(protocol_failed(attr) ||
634		     attr[IPSET_ATTR_SETNAME] == NULL ||
635		     attr[IPSET_ATTR_TYPENAME] == NULL ||
636		     attr[IPSET_ATTR_REVISION] == NULL ||
637		     attr[IPSET_ATTR_FAMILY] == NULL ||
638		     (attr[IPSET_ATTR_DATA] != NULL &&
639		      !flag_nested(attr[IPSET_ATTR_DATA]))))
640		return -IPSET_ERR_PROTOCOL;
641
642	name = nla_data(attr[IPSET_ATTR_SETNAME]);
643	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
644	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
645	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
646	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
647		 name, typename, family_name(family), revision);
648
649	/*
650	 * First, and without any locks, allocate and initialize
651	 * a normal base set structure.
652	 */
653	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
654	if (!set)
655		return -ENOMEM;
656	rwlock_init(&set->lock);
657	strlcpy(set->name, name, IPSET_MAXNAMELEN);
658	set->family = family;
659
660	/*
661	 * Next, check that we know the type, and take
662	 * a reference on the type, to make sure it stays available
663	 * while constructing our new set.
664	 *
665	 * After referencing the type, we try to create the type
666	 * specific part of the set without holding any locks.
667	 */
668	ret = find_set_type_get(typename, family, revision, &(set->type));
669	if (ret)
670		goto out;
671
672	/*
673	 * Without holding any locks, create private part.
674	 */
675	if (attr[IPSET_ATTR_DATA] &&
676	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
677			     set->type->create_policy)) {
678	    	ret = -IPSET_ERR_PROTOCOL;
679	    	goto put_out;
680	}
681
682	ret = set->type->create(set, tb, flags);
683	if (ret != 0)
684		goto put_out;
685
686	/* BTW, ret==0 here. */
687
688	/*
689	 * Here, we have a valid, constructed set and we are protected
690	 * by the nfnl mutex. Find the first free index in ip_set_list
691	 * and check clashing.
692	 */
693	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
694		/* If this is the same set and requested, ignore error */
695		if (ret == -EEXIST &&
696		    (flags & IPSET_FLAG_EXIST) &&
697		    STREQ(set->type->name, clash->type->name) &&
698		    set->type->family == clash->type->family &&
699		    set->type->revision == clash->type->revision &&
700		    set->variant->same_set(set, clash))
701			ret = 0;
702		goto cleanup;
703	}
704
705	/*
706	 * Finally! Add our shiny new set to the list, and be done.
707	 */
708	pr_debug("create: '%s' created with index %u!\n", set->name, index);
709	ip_set_list[index] = set;
710
711	return ret;
712
713cleanup:
714	set->variant->destroy(set);
715put_out:
716	module_put(set->type->me);
717out:
718	kfree(set);
719	return ret;
720}
721
722/* Destroy sets */
723
724static const struct nla_policy
725ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
726	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
727	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
728				    .len = IPSET_MAXNAMELEN - 1 },
729};
730
731static void
732ip_set_destroy_set(ip_set_id_t index)
733{
734	struct ip_set *set = ip_set_list[index];
735
736	pr_debug("set: %s\n",  set->name);
737	ip_set_list[index] = NULL;
738
739	/* Must call it without holding any lock */
740	set->variant->destroy(set);
741	module_put(set->type->me);
742	kfree(set);
743}
744
745static int
746ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
747	       const struct nlmsghdr *nlh,
748	       const struct nlattr * const attr[])
749{
750	ip_set_id_t i;
751	int ret = 0;
752
753	if (unlikely(protocol_failed(attr)))
754		return -IPSET_ERR_PROTOCOL;
755
756	/* Commands are serialized and references are
757	 * protected by the ip_set_ref_lock.
758	 * External systems (i.e. xt_set) must call
759	 * ip_set_put|get_nfnl_* functions, that way we
760	 * can safely check references here.
761	 *
762	 * list:set timer can only decrement the reference
763	 * counter, so if it's already zero, we can proceed
764	 * without holding the lock.
765	 */
766	read_lock_bh(&ip_set_ref_lock);
767	if (!attr[IPSET_ATTR_SETNAME]) {
768		for (i = 0; i < ip_set_max; i++) {
769			if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
770				ret = IPSET_ERR_BUSY;
771				goto out;
772			}
773		}
774		read_unlock_bh(&ip_set_ref_lock);
775		for (i = 0; i < ip_set_max; i++) {
776			if (ip_set_list[i] != NULL)
777				ip_set_destroy_set(i);
778		}
779	} else {
780		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
781		if (i == IPSET_INVALID_ID) {
782			ret = -ENOENT;
783			goto out;
784		} else if (ip_set_list[i]->ref) {
785			ret = -IPSET_ERR_BUSY;
786			goto out;
787		}
788		read_unlock_bh(&ip_set_ref_lock);
789
790		ip_set_destroy_set(i);
791	}
792	return 0;
793out:
794	read_unlock_bh(&ip_set_ref_lock);
795	return ret;
796}
797
798/* Flush sets */
799
800static void
801ip_set_flush_set(struct ip_set *set)
802{
803	pr_debug("set: %s\n",  set->name);
804
805	write_lock_bh(&set->lock);
806	set->variant->flush(set);
807	write_unlock_bh(&set->lock);
808}
809
810static int
811ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
812	     const struct nlmsghdr *nlh,
813	     const struct nlattr * const attr[])
814{
815	ip_set_id_t i;
816
817	if (unlikely(protocol_failed(attr)))
818		return -EPROTO;
819
820	if (!attr[IPSET_ATTR_SETNAME]) {
821		for (i = 0; i < ip_set_max; i++)
822			if (ip_set_list[i] != NULL)
823				ip_set_flush_set(ip_set_list[i]);
824	} else {
825		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
826		if (i == IPSET_INVALID_ID)
827			return -ENOENT;
828
829		ip_set_flush_set(ip_set_list[i]);
830	}
831
832	return 0;
833}
834
835/* Rename a set */
836
837static const struct nla_policy
838ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
839	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
840	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
841				    .len = IPSET_MAXNAMELEN - 1 },
842	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
843				    .len = IPSET_MAXNAMELEN - 1 },
844};
845
846static int
847ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
848	      const struct nlmsghdr *nlh,
849	      const struct nlattr * const attr[])
850{
851	struct ip_set *set;
852	const char *name2;
853	ip_set_id_t i;
854	int ret = 0;
855
856	if (unlikely(protocol_failed(attr) ||
857		     attr[IPSET_ATTR_SETNAME] == NULL ||
858		     attr[IPSET_ATTR_SETNAME2] == NULL))
859		return -IPSET_ERR_PROTOCOL;
860
861	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
862	if (set == NULL)
863		return -ENOENT;
864
865	read_lock_bh(&ip_set_ref_lock);
866	if (set->ref != 0) {
867		ret = -IPSET_ERR_REFERENCED;
868		goto out;
869	}
870
871	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
872	for (i = 0; i < ip_set_max; i++) {
873		if (ip_set_list[i] != NULL &&
874		    STREQ(ip_set_list[i]->name, name2)) {
875			ret = -IPSET_ERR_EXIST_SETNAME2;
876			goto out;
877		}
878	}
879	strncpy(set->name, name2, IPSET_MAXNAMELEN);
880
881out:
882	read_unlock_bh(&ip_set_ref_lock);
883	return ret;
884}
885
886/* Swap two sets so that name/index points to the other.
887 * References and set names are also swapped.
888 *
889 * The commands are serialized by the nfnl mutex and references are
890 * protected by the ip_set_ref_lock. The kernel interfaces
891 * do not hold the mutex but the pointer settings are atomic
892 * so the ip_set_list always contains valid pointers to the sets.
893 */
894
895static int
896ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
897	    const struct nlmsghdr *nlh,
898	    const struct nlattr * const attr[])
899{
900	struct ip_set *from, *to;
901	ip_set_id_t from_id, to_id;
902	char from_name[IPSET_MAXNAMELEN];
903
904	if (unlikely(protocol_failed(attr) ||
905		     attr[IPSET_ATTR_SETNAME] == NULL ||
906		     attr[IPSET_ATTR_SETNAME2] == NULL))
907		return -IPSET_ERR_PROTOCOL;
908
909	from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
910	if (from_id == IPSET_INVALID_ID)
911		return -ENOENT;
912
913	to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
914	if (to_id == IPSET_INVALID_ID)
915		return -IPSET_ERR_EXIST_SETNAME2;
916
917	from = ip_set_list[from_id];
918	to = ip_set_list[to_id];
919
920	/* Features must not change.
921	 * Not an artifical restriction anymore, as we must prevent
922	 * possible loops created by swapping in setlist type of sets. */
923	if (!(from->type->features == to->type->features &&
924	      from->type->family == to->type->family))
925		return -IPSET_ERR_TYPE_MISMATCH;
926
927	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
928	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
929	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
930
931	write_lock_bh(&ip_set_ref_lock);
932	swap(from->ref, to->ref);
933	ip_set_list[from_id] = to;
934	ip_set_list[to_id] = from;
935	write_unlock_bh(&ip_set_ref_lock);
936
937	return 0;
938}
939
940/* List/save set data */
941
942#define DUMP_INIT	0L
943#define DUMP_ALL	1L
944#define DUMP_ONE	2L
945#define DUMP_LAST	3L
946
947static int
948ip_set_dump_done(struct netlink_callback *cb)
949{
950	if (cb->args[2]) {
951		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
952		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
953	}
954	return 0;
955}
956
957static inline void
958dump_attrs(struct nlmsghdr *nlh)
959{
960	const struct nlattr *attr;
961	int rem;
962
963	pr_debug("dump nlmsg\n");
964	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
965		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
966	}
967}
968
969static int
970dump_init(struct netlink_callback *cb)
971{
972	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
973	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
974	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
975	struct nlattr *attr = (void *)nlh + min_len;
976	ip_set_id_t index;
977
978	/* Second pass, so parser can't fail */
979	nla_parse(cda, IPSET_ATTR_CMD_MAX,
980		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
981
982	/* cb->args[0] : dump single set/all sets
983	 *         [1] : set index
984	 *         [..]: type specific
985	 */
986
987	if (!cda[IPSET_ATTR_SETNAME]) {
988		cb->args[0] = DUMP_ALL;
989		return 0;
990	}
991
992	index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
993	if (index == IPSET_INVALID_ID)
994		return -ENOENT;
995
996	cb->args[0] = DUMP_ONE;
997	cb->args[1] = index;
998	return 0;
999}
1000
1001static int
1002ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1003{
1004	ip_set_id_t index = IPSET_INVALID_ID, max;
1005	struct ip_set *set = NULL;
1006	struct nlmsghdr *nlh = NULL;
1007	unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
1008	int ret = 0;
1009
1010	if (cb->args[0] == DUMP_INIT) {
1011		ret = dump_init(cb);
1012		if (ret < 0) {
1013			nlh = nlmsg_hdr(cb->skb);
1014			/* We have to create and send the error message
1015			 * manually :-( */
1016			if (nlh->nlmsg_flags & NLM_F_ACK)
1017				netlink_ack(cb->skb, nlh, ret);
1018			return ret;
1019		}
1020	}
1021
1022	if (cb->args[1] >= ip_set_max)
1023		goto out;
1024
1025	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
1026	max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
1027	for (; cb->args[1] < max; cb->args[1]++) {
1028		index = (ip_set_id_t) cb->args[1];
1029		set = ip_set_list[index];
1030		if (set == NULL) {
1031			if (cb->args[0] == DUMP_ONE) {
1032				ret = -ENOENT;
1033				goto out;
1034			}
1035			continue;
1036		}
1037		/* When dumping all sets, we must dump "sorted"
1038		 * so that lists (unions of sets) are dumped last.
1039		 */
1040		if (cb->args[0] != DUMP_ONE &&
1041		    !((cb->args[0] == DUMP_ALL) ^
1042		      (set->type->features & IPSET_DUMP_LAST)))
1043			continue;
1044		pr_debug("List set: %s\n", set->name);
1045		if (!cb->args[2]) {
1046			/* Start listing: make sure set won't be destroyed */
1047			pr_debug("reference set\n");
1048			__ip_set_get(index);
1049		}
1050		nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
1051				cb->nlh->nlmsg_seq, flags,
1052				IPSET_CMD_LIST);
1053		if (!nlh) {
1054			ret = -EMSGSIZE;
1055			goto release_refcount;
1056		}
1057		NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1058		NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
1059		switch (cb->args[2]) {
1060		case 0:
1061			/* Core header data */
1062			NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
1063				       set->type->name);
1064			NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
1065				   set->family);
1066			NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
1067				   set->type->revision);
1068			ret = set->variant->head(set, skb);
1069			if (ret < 0)
1070				goto release_refcount;
1071			/* Fall through and add elements */
1072		default:
1073			read_lock_bh(&set->lock);
1074			ret = set->variant->list(set, skb, cb);
1075			read_unlock_bh(&set->lock);
1076			if (!cb->args[2]) {
1077				/* Set is done, proceed with next one */
1078				if (cb->args[0] == DUMP_ONE)
1079					cb->args[1] = IPSET_INVALID_ID;
1080				else
1081					cb->args[1]++;
1082			}
1083			goto release_refcount;
1084		}
1085	}
1086	goto out;
1087
1088nla_put_failure:
1089	ret = -EFAULT;
1090release_refcount:
1091	/* If there was an error or set is done, release set */
1092	if (ret || !cb->args[2]) {
1093		pr_debug("release set %s\n", ip_set_list[index]->name);
1094		ip_set_put_byindex(index);
1095	}
1096
1097	/* If we dump all sets, continue with dumping last ones */
1098	if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
1099		cb->args[0] = DUMP_LAST;
1100
1101out:
1102	if (nlh) {
1103		nlmsg_end(skb, nlh);
1104		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1105		dump_attrs(nlh);
1106	}
1107
1108	return ret < 0 ? ret : skb->len;
1109}
1110
1111static int
1112ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1113	    const struct nlmsghdr *nlh,
1114	    const struct nlattr * const attr[])
1115{
1116	if (unlikely(protocol_failed(attr)))
1117		return -IPSET_ERR_PROTOCOL;
1118
1119	return netlink_dump_start(ctnl, skb, nlh,
1120				  ip_set_dump_start,
1121				  ip_set_dump_done);
1122}
1123
1124/* Add, del and test */
1125
1126static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1127	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1128	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1129				    .len = IPSET_MAXNAMELEN - 1 },
1130	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
1131	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
1132	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
1133};
1134
1135static int
1136call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1137	struct nlattr *tb[], enum ipset_adt adt,
1138	u32 flags, bool use_lineno)
1139{
1140	int ret, retried = 0;
1141	u32 lineno = 0;
1142	bool eexist = flags & IPSET_FLAG_EXIST;
1143
1144	do {
1145		write_lock_bh(&set->lock);
1146		ret = set->variant->uadt(set, tb, adt, &lineno, flags);
1147		write_unlock_bh(&set->lock);
1148	} while (ret == -EAGAIN &&
1149		 set->variant->resize &&
1150		 (ret = set->variant->resize(set, retried++)) == 0);
1151
1152	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1153		return 0;
1154	if (lineno && use_lineno) {
1155		/* Error in restore/batch mode: send back lineno */
1156		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1157		struct sk_buff *skb2;
1158		struct nlmsgerr *errmsg;
1159		size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1160		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1161		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1162		struct nlattr *cmdattr;
1163		u32 *errline;
1164
1165		skb2 = nlmsg_new(payload, GFP_KERNEL);
1166		if (skb2 == NULL)
1167			return -ENOMEM;
1168		rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
1169				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1170		errmsg = nlmsg_data(rep);
1171		errmsg->error = ret;
1172		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1173		cmdattr = (void *)&errmsg->msg + min_len;
1174
1175		nla_parse(cda, IPSET_ATTR_CMD_MAX,
1176			  cmdattr, nlh->nlmsg_len - min_len,
1177			  ip_set_adt_policy);
1178
1179		errline = nla_data(cda[IPSET_ATTR_LINENO]);
1180
1181		*errline = lineno;
1182
1183		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1184		/* Signal netlink not to send its ACK/errmsg.  */
1185		return -EINTR;
1186	}
1187
1188	return ret;
1189}
1190
1191static int
1192ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1193	    const struct nlmsghdr *nlh,
1194	    const struct nlattr * const attr[])
1195{
1196	struct ip_set *set;
1197	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1198	const struct nlattr *nla;
1199	u32 flags = flag_exist(nlh);
1200	bool use_lineno;
1201	int ret = 0;
1202
1203	if (unlikely(protocol_failed(attr) ||
1204		     attr[IPSET_ATTR_SETNAME] == NULL ||
1205		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1206		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1207		     (attr[IPSET_ATTR_DATA] != NULL &&
1208		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1209		     (attr[IPSET_ATTR_ADT] != NULL &&
1210		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1211		       attr[IPSET_ATTR_LINENO] == NULL))))
1212		return -IPSET_ERR_PROTOCOL;
1213
1214	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1215	if (set == NULL)
1216		return -ENOENT;
1217
1218	use_lineno = !!attr[IPSET_ATTR_LINENO];
1219	if (attr[IPSET_ATTR_DATA]) {
1220		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1221				     attr[IPSET_ATTR_DATA],
1222				     set->type->adt_policy))
1223			return -IPSET_ERR_PROTOCOL;
1224		ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1225			      use_lineno);
1226	} else {
1227		int nla_rem;
1228
1229		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1230			memset(tb, 0, sizeof(tb));
1231			if (nla_type(nla) != IPSET_ATTR_DATA ||
1232			    !flag_nested(nla) ||
1233			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1234					     set->type->adt_policy))
1235				return -IPSET_ERR_PROTOCOL;
1236			ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
1237				      flags, use_lineno);
1238			if (ret < 0)
1239				return ret;
1240		}
1241	}
1242	return ret;
1243}
1244
1245static int
1246ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1247	    const struct nlmsghdr *nlh,
1248	    const struct nlattr * const attr[])
1249{
1250	struct ip_set *set;
1251	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1252	const struct nlattr *nla;
1253	u32 flags = flag_exist(nlh);
1254	bool use_lineno;
1255	int ret = 0;
1256
1257	if (unlikely(protocol_failed(attr) ||
1258		     attr[IPSET_ATTR_SETNAME] == NULL ||
1259		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1260		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1261		     (attr[IPSET_ATTR_DATA] != NULL &&
1262		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1263		     (attr[IPSET_ATTR_ADT] != NULL &&
1264		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1265		       attr[IPSET_ATTR_LINENO] == NULL))))
1266		return -IPSET_ERR_PROTOCOL;
1267
1268	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1269	if (set == NULL)
1270		return -ENOENT;
1271
1272	use_lineno = !!attr[IPSET_ATTR_LINENO];
1273	if (attr[IPSET_ATTR_DATA]) {
1274		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1275				     attr[IPSET_ATTR_DATA],
1276				     set->type->adt_policy))
1277			return -IPSET_ERR_PROTOCOL;
1278		ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1279			      use_lineno);
1280	} else {
1281		int nla_rem;
1282
1283		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1284			memset(tb, 0, sizeof(*tb));
1285			if (nla_type(nla) != IPSET_ATTR_DATA ||
1286			    !flag_nested(nla) ||
1287			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1288					     set->type->adt_policy))
1289				return -IPSET_ERR_PROTOCOL;
1290			ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
1291				      flags, use_lineno);
1292			if (ret < 0)
1293				return ret;
1294		}
1295	}
1296	return ret;
1297}
1298
1299static int
1300ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1301	     const struct nlmsghdr *nlh,
1302	     const struct nlattr * const attr[])
1303{
1304	struct ip_set *set;
1305	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1306	int ret = 0;
1307
1308	if (unlikely(protocol_failed(attr) ||
1309		     attr[IPSET_ATTR_SETNAME] == NULL ||
1310		     attr[IPSET_ATTR_DATA] == NULL ||
1311		     !flag_nested(attr[IPSET_ATTR_DATA])))
1312		return -IPSET_ERR_PROTOCOL;
1313
1314	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1315	if (set == NULL)
1316		return -ENOENT;
1317
1318	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1319			     set->type->adt_policy))
1320		return -IPSET_ERR_PROTOCOL;
1321
1322	read_lock_bh(&set->lock);
1323	ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0);
1324	read_unlock_bh(&set->lock);
1325	/* Userspace can't trigger element to be re-added */
1326	if (ret == -EAGAIN)
1327		ret = 1;
1328
1329	return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1330}
1331
1332/* Get headed data of a set */
1333
1334static int
1335ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1336	      const struct nlmsghdr *nlh,
1337	      const struct nlattr * const attr[])
1338{
1339	const struct ip_set *set;
1340	struct sk_buff *skb2;
1341	struct nlmsghdr *nlh2;
1342	ip_set_id_t index;
1343	int ret = 0;
1344
1345	if (unlikely(protocol_failed(attr) ||
1346		     attr[IPSET_ATTR_SETNAME] == NULL))
1347		return -IPSET_ERR_PROTOCOL;
1348
1349	index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1350	if (index == IPSET_INVALID_ID)
1351		return -ENOENT;
1352	set = ip_set_list[index];
1353
1354	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1355	if (skb2 == NULL)
1356		return -ENOMEM;
1357
1358	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1359			 IPSET_CMD_HEADER);
1360	if (!nlh2)
1361		goto nlmsg_failure;
1362	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1363	NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
1364	NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
1365	NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family);
1366	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
1367	nlmsg_end(skb2, nlh2);
1368
1369	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1370	if (ret < 0)
1371		return ret;
1372
1373	return 0;
1374
1375nla_put_failure:
1376	nlmsg_cancel(skb2, nlh2);
1377nlmsg_failure:
1378	kfree_skb(skb2);
1379	return -EMSGSIZE;
1380}
1381
1382/* Get type data */
1383
1384static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1385	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1386	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
1387				    .len = IPSET_MAXNAMELEN - 1 },
1388	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
1389};
1390
1391static int
1392ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1393	    const struct nlmsghdr *nlh,
1394	    const struct nlattr * const attr[])
1395{
1396	struct sk_buff *skb2;
1397	struct nlmsghdr *nlh2;
1398	u8 family, min, max;
1399	const char *typename;
1400	int ret = 0;
1401
1402	if (unlikely(protocol_failed(attr) ||
1403		     attr[IPSET_ATTR_TYPENAME] == NULL ||
1404		     attr[IPSET_ATTR_FAMILY] == NULL))
1405		return -IPSET_ERR_PROTOCOL;
1406
1407	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1408	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1409	ret = find_set_type_minmax(typename, family, &min, &max);
1410	if (ret)
1411		return ret;
1412
1413	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1414	if (skb2 == NULL)
1415		return -ENOMEM;
1416
1417	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1418			 IPSET_CMD_TYPE);
1419	if (!nlh2)
1420		goto nlmsg_failure;
1421	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1422	NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
1423	NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
1424	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
1425	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
1426	nlmsg_end(skb2, nlh2);
1427
1428	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1429	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1430	if (ret < 0)
1431		return ret;
1432
1433	return 0;
1434
1435nla_put_failure:
1436	nlmsg_cancel(skb2, nlh2);
1437nlmsg_failure:
1438	kfree_skb(skb2);
1439	return -EMSGSIZE;
1440}
1441
1442/* Get protocol version */
1443
1444static const struct nla_policy
1445ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1446	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1447};
1448
1449static int
1450ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1451		const struct nlmsghdr *nlh,
1452		const struct nlattr * const attr[])
1453{
1454	struct sk_buff *skb2;
1455	struct nlmsghdr *nlh2;
1456	int ret = 0;
1457
1458	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1459		return -IPSET_ERR_PROTOCOL;
1460
1461	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1462	if (skb2 == NULL)
1463		return -ENOMEM;
1464
1465	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1466			 IPSET_CMD_PROTOCOL);
1467	if (!nlh2)
1468		goto nlmsg_failure;
1469	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1470	nlmsg_end(skb2, nlh2);
1471
1472	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1473	if (ret < 0)
1474		return ret;
1475
1476	return 0;
1477
1478nla_put_failure:
1479	nlmsg_cancel(skb2, nlh2);
1480nlmsg_failure:
1481	kfree_skb(skb2);
1482	return -EMSGSIZE;
1483}
1484
1485static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1486	[IPSET_CMD_CREATE]	= {
1487		.call		= ip_set_create,
1488		.attr_count	= IPSET_ATTR_CMD_MAX,
1489		.policy		= ip_set_create_policy,
1490	},
1491	[IPSET_CMD_DESTROY]	= {
1492		.call		= ip_set_destroy,
1493		.attr_count	= IPSET_ATTR_CMD_MAX,
1494		.policy		= ip_set_setname_policy,
1495	},
1496	[IPSET_CMD_FLUSH]	= {
1497		.call		= ip_set_flush,
1498		.attr_count	= IPSET_ATTR_CMD_MAX,
1499		.policy		= ip_set_setname_policy,
1500	},
1501	[IPSET_CMD_RENAME]	= {
1502		.call		= ip_set_rename,
1503		.attr_count	= IPSET_ATTR_CMD_MAX,
1504		.policy		= ip_set_setname2_policy,
1505	},
1506	[IPSET_CMD_SWAP]	= {
1507		.call		= ip_set_swap,
1508		.attr_count	= IPSET_ATTR_CMD_MAX,
1509		.policy		= ip_set_setname2_policy,
1510	},
1511	[IPSET_CMD_LIST]	= {
1512		.call		= ip_set_dump,
1513		.attr_count	= IPSET_ATTR_CMD_MAX,
1514		.policy		= ip_set_setname_policy,
1515	},
1516	[IPSET_CMD_SAVE]	= {
1517		.call		= ip_set_dump,
1518		.attr_count	= IPSET_ATTR_CMD_MAX,
1519		.policy		= ip_set_setname_policy,
1520	},
1521	[IPSET_CMD_ADD]	= {
1522		.call		= ip_set_uadd,
1523		.attr_count	= IPSET_ATTR_CMD_MAX,
1524		.policy		= ip_set_adt_policy,
1525	},
1526	[IPSET_CMD_DEL]	= {
1527		.call		= ip_set_udel,
1528		.attr_count	= IPSET_ATTR_CMD_MAX,
1529		.policy		= ip_set_adt_policy,
1530	},
1531	[IPSET_CMD_TEST]	= {
1532		.call		= ip_set_utest,
1533		.attr_count	= IPSET_ATTR_CMD_MAX,
1534		.policy		= ip_set_adt_policy,
1535	},
1536	[IPSET_CMD_HEADER]	= {
1537		.call		= ip_set_header,
1538		.attr_count	= IPSET_ATTR_CMD_MAX,
1539		.policy		= ip_set_setname_policy,
1540	},
1541	[IPSET_CMD_TYPE]	= {
1542		.call		= ip_set_type,
1543		.attr_count	= IPSET_ATTR_CMD_MAX,
1544		.policy		= ip_set_type_policy,
1545	},
1546	[IPSET_CMD_PROTOCOL]	= {
1547		.call		= ip_set_protocol,
1548		.attr_count	= IPSET_ATTR_CMD_MAX,
1549		.policy		= ip_set_protocol_policy,
1550	},
1551};
1552
1553static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1554	.name		= "ip_set",
1555	.subsys_id	= NFNL_SUBSYS_IPSET,
1556	.cb_count	= IPSET_MSG_MAX,
1557	.cb		= ip_set_netlink_subsys_cb,
1558};
1559
1560/* Interface to iptables/ip6tables */
1561
1562static int
1563ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1564{
1565	unsigned *op;
1566	void *data;
1567	int copylen = *len, ret = 0;
1568
1569	if (!capable(CAP_NET_ADMIN))
1570		return -EPERM;
1571	if (optval != SO_IP_SET)
1572		return -EBADF;
1573	if (*len < sizeof(unsigned))
1574		return -EINVAL;
1575
1576	data = vmalloc(*len);
1577	if (!data)
1578		return -ENOMEM;
1579	if (copy_from_user(data, user, *len) != 0) {
1580		ret = -EFAULT;
1581		goto done;
1582	}
1583	op = (unsigned *) data;
1584
1585	if (*op < IP_SET_OP_VERSION) {
1586		/* Check the version at the beginning of operations */
1587		struct ip_set_req_version *req_version = data;
1588		if (req_version->version != IPSET_PROTOCOL) {
1589			ret = -EPROTO;
1590			goto done;
1591		}
1592	}
1593
1594	switch (*op) {
1595	case IP_SET_OP_VERSION: {
1596		struct ip_set_req_version *req_version = data;
1597
1598		if (*len != sizeof(struct ip_set_req_version)) {
1599			ret = -EINVAL;
1600			goto done;
1601		}
1602
1603		req_version->version = IPSET_PROTOCOL;
1604		ret = copy_to_user(user, req_version,
1605				   sizeof(struct ip_set_req_version));
1606		goto done;
1607	}
1608	case IP_SET_OP_GET_BYNAME: {
1609		struct ip_set_req_get_set *req_get = data;
1610
1611		if (*len != sizeof(struct ip_set_req_get_set)) {
1612			ret = -EINVAL;
1613			goto done;
1614		}
1615		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1616		nfnl_lock();
1617		req_get->set.index = find_set_id(req_get->set.name);
1618		nfnl_unlock();
1619		goto copy;
1620	}
1621	case IP_SET_OP_GET_BYINDEX: {
1622		struct ip_set_req_get_set *req_get = data;
1623
1624		if (*len != sizeof(struct ip_set_req_get_set) ||
1625		    req_get->set.index >= ip_set_max) {
1626			ret = -EINVAL;
1627			goto done;
1628		}
1629		nfnl_lock();
1630		strncpy(req_get->set.name,
1631			ip_set_list[req_get->set.index]
1632				? ip_set_list[req_get->set.index]->name : "",
1633			IPSET_MAXNAMELEN);
1634		nfnl_unlock();
1635		goto copy;
1636	}
1637	default:
1638		ret = -EBADMSG;
1639		goto done;
1640	}	/* end of switch(op) */
1641
1642copy:
1643	ret = copy_to_user(user, data, copylen);
1644
1645done:
1646	vfree(data);
1647	if (ret > 0)
1648		ret = 0;
1649	return ret;
1650}
1651
1652static struct nf_sockopt_ops so_set __read_mostly = {
1653	.pf		= PF_INET,
1654	.get_optmin	= SO_IP_SET,
1655	.get_optmax	= SO_IP_SET + 1,
1656	.get		= &ip_set_sockfn_get,
1657	.owner		= THIS_MODULE,
1658};
1659
1660static int __init
1661ip_set_init(void)
1662{
1663	int ret;
1664
1665	if (max_sets)
1666		ip_set_max = max_sets;
1667	if (ip_set_max >= IPSET_INVALID_ID)
1668		ip_set_max = IPSET_INVALID_ID - 1;
1669
1670	ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1671			      GFP_KERNEL);
1672	if (!ip_set_list) {
1673		pr_err("ip_set: Unable to create ip_set_list\n");
1674		return -ENOMEM;
1675	}
1676
1677	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1678	if (ret != 0) {
1679		pr_err("ip_set: cannot register with nfnetlink.\n");
1680		kfree(ip_set_list);
1681		return ret;
1682	}
1683	ret = nf_register_sockopt(&so_set);
1684	if (ret != 0) {
1685		pr_err("SO_SET registry failed: %d\n", ret);
1686		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1687		kfree(ip_set_list);
1688		return ret;
1689	}
1690
1691	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1692	return 0;
1693}
1694
1695static void __exit
1696ip_set_fini(void)
1697{
1698	/* There can't be any existing set */
1699	nf_unregister_sockopt(&so_set);
1700	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1701	kfree(ip_set_list);
1702	pr_debug("these are the famous last words\n");
1703}
1704
1705module_init(ip_set_init);
1706module_exit(ip_set_fini);
1707