ip_set_core.c revision 8da560ced56c423cd6d35803cd0244c944c676bd
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 *                         Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
20#include <linux/version.h>
21#include <net/netlink.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list);		/* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
29
30static struct ip_set **ip_set_list;		/* all individual sets */
31static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
32
33#define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
34
35static unsigned int max_sets;
36
37module_param(max_sets, int, 0600);
38MODULE_PARM_DESC(max_sets, "maximal number of sets");
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
41MODULE_DESCRIPTION("core IP set support");
42MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
43
44/*
45 * The set types are implemented in modules and registered set types
46 * can be found in ip_set_type_list. Adding/deleting types is
47 * serialized by ip_set_type_mutex.
48 */
49
50static inline void
51ip_set_type_lock(void)
52{
53	mutex_lock(&ip_set_type_mutex);
54}
55
56static inline void
57ip_set_type_unlock(void)
58{
59	mutex_unlock(&ip_set_type_mutex);
60}
61
62/* Register and deregister settype */
63
64static struct ip_set_type *
65find_set_type(const char *name, u8 family, u8 revision)
66{
67	struct ip_set_type *type;
68
69	list_for_each_entry_rcu(type, &ip_set_type_list, list)
70		if (STREQ(type->name, name) &&
71		    (type->family == family || type->family == AF_UNSPEC) &&
72		    type->revision == revision)
73			return type;
74	return NULL;
75}
76
77/* Unlock, try to load a set type module and lock again */
78static int
79try_to_load_type(const char *name)
80{
81	nfnl_unlock();
82	pr_debug("try to load ip_set_%s\n", name);
83	if (request_module("ip_set_%s", name) < 0) {
84		pr_warning("Can't find ip_set type %s\n", name);
85		nfnl_lock();
86		return -IPSET_ERR_FIND_TYPE;
87	}
88	nfnl_lock();
89	return -EAGAIN;
90}
91
92/* Find a set type and reference it */
93static int
94find_set_type_get(const char *name, u8 family, u8 revision,
95		  struct ip_set_type **found)
96{
97	rcu_read_lock();
98	*found = find_set_type(name, family, revision);
99	if (*found) {
100		int err = !try_module_get((*found)->me);
101		rcu_read_unlock();
102		return err ? -EFAULT : 0;
103	}
104	rcu_read_unlock();
105
106	return try_to_load_type(name);
107}
108
109/* Find a given set type by name and family.
110 * If we succeeded, the supported minimal and maximum revisions are
111 * filled out.
112 */
113static int
114find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
115{
116	struct ip_set_type *type;
117	bool found = false;
118
119	*min = *max = 0;
120	rcu_read_lock();
121	list_for_each_entry_rcu(type, &ip_set_type_list, list)
122		if (STREQ(type->name, name) &&
123		    (type->family == family || type->family == AF_UNSPEC)) {
124			found = true;
125			if (type->revision < *min)
126				*min = type->revision;
127			else if (type->revision > *max)
128				*max = type->revision;
129		}
130	rcu_read_unlock();
131	if (found)
132		return 0;
133
134	return try_to_load_type(name);
135}
136
137#define family_name(f)	((f) == AF_INET ? "inet" : \
138			 (f) == AF_INET6 ? "inet6" : "any")
139
140/* Register a set type structure. The type is identified by
141 * the unique triple of name, family and revision.
142 */
143int
144ip_set_type_register(struct ip_set_type *type)
145{
146	int ret = 0;
147
148	if (type->protocol != IPSET_PROTOCOL) {
149		pr_warning("ip_set type %s, family %s, revision %u uses "
150			   "wrong protocol version %u (want %u)\n",
151			   type->name, family_name(type->family),
152			   type->revision, type->protocol, IPSET_PROTOCOL);
153		return -EINVAL;
154	}
155
156	ip_set_type_lock();
157	if (find_set_type(type->name, type->family, type->revision)) {
158		/* Duplicate! */
159		pr_warning("ip_set type %s, family %s, revision %u "
160			   "already registered!\n", type->name,
161			   family_name(type->family), type->revision);
162		ret = -EINVAL;
163		goto unlock;
164	}
165	list_add_rcu(&type->list, &ip_set_type_list);
166	pr_debug("type %s, family %s, revision %u registered.\n",
167		 type->name, family_name(type->family), type->revision);
168unlock:
169	ip_set_type_unlock();
170	return ret;
171}
172EXPORT_SYMBOL_GPL(ip_set_type_register);
173
174/* Unregister a set type. There's a small race with ip_set_create */
175void
176ip_set_type_unregister(struct ip_set_type *type)
177{
178	ip_set_type_lock();
179	if (!find_set_type(type->name, type->family, type->revision)) {
180		pr_warning("ip_set type %s, family %s, revision %u "
181			   "not registered\n", type->name,
182			   family_name(type->family), type->revision);
183		goto unlock;
184	}
185	list_del_rcu(&type->list);
186	pr_debug("type %s, family %s, revision %u unregistered.\n",
187		 type->name, family_name(type->family), type->revision);
188unlock:
189	ip_set_type_unlock();
190
191	synchronize_rcu();
192}
193EXPORT_SYMBOL_GPL(ip_set_type_unregister);
194
195/* Utility functions */
196void *
197ip_set_alloc(size_t size)
198{
199	void *members = NULL;
200
201	if (size < KMALLOC_MAX_SIZE)
202		members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
203
204	if (members) {
205		pr_debug("%p: allocated with kmalloc\n", members);
206		return members;
207	}
208
209	members = vzalloc(size);
210	if (!members)
211		return NULL;
212	pr_debug("%p: allocated with vmalloc\n", members);
213
214	return members;
215}
216EXPORT_SYMBOL_GPL(ip_set_alloc);
217
218void
219ip_set_free(void *members)
220{
221	pr_debug("%p: free with %s\n", members,
222		 is_vmalloc_addr(members) ? "vfree" : "kfree");
223	if (is_vmalloc_addr(members))
224		vfree(members);
225	else
226		kfree(members);
227}
228EXPORT_SYMBOL_GPL(ip_set_free);
229
230static inline bool
231flag_nested(const struct nlattr *nla)
232{
233	return nla->nla_type & NLA_F_NESTED;
234}
235
236static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
237	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
238	[IPSET_ATTR_IPADDR_IPV6]	= { .type = NLA_BINARY,
239					    .len = sizeof(struct in6_addr) },
240};
241
242int
243ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
244{
245	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
246
247	if (unlikely(!flag_nested(nla)))
248		return -IPSET_ERR_PROTOCOL;
249	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
250		return -IPSET_ERR_PROTOCOL;
251	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
252		return -IPSET_ERR_PROTOCOL;
253
254	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
255	return 0;
256}
257EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
258
259int
260ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
261{
262	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
263
264	if (unlikely(!flag_nested(nla)))
265		return -IPSET_ERR_PROTOCOL;
266
267	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
268		return -IPSET_ERR_PROTOCOL;
269	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
270		return -IPSET_ERR_PROTOCOL;
271
272	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
273		sizeof(struct in6_addr));
274	return 0;
275}
276EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
277
278/*
279 * Creating/destroying/renaming/swapping affect the existence and
280 * the properties of a set. All of these can be executed from userspace
281 * only and serialized by the nfnl mutex indirectly from nfnetlink.
282 *
283 * Sets are identified by their index in ip_set_list and the index
284 * is used by the external references (set/SET netfilter modules).
285 *
286 * The set behind an index may change by swapping only, from userspace.
287 */
288
289static inline void
290__ip_set_get(ip_set_id_t index)
291{
292	atomic_inc(&ip_set_list[index]->ref);
293}
294
295static inline void
296__ip_set_put(ip_set_id_t index)
297{
298	atomic_dec(&ip_set_list[index]->ref);
299}
300
301/*
302 * Add, del and test set entries from kernel.
303 *
304 * The set behind the index must exist and must be referenced
305 * so it can't be destroyed (or changed) under our foot.
306 */
307
308int
309ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
310	    u8 family, u8 dim, u8 flags)
311{
312	struct ip_set *set = ip_set_list[index];
313	int ret = 0;
314
315	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
316	pr_debug("set %s, index %u\n", set->name, index);
317
318	if (dim < set->type->dimension ||
319	    !(family == set->family || set->family == AF_UNSPEC))
320		return 0;
321
322	read_lock_bh(&set->lock);
323	ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags);
324	read_unlock_bh(&set->lock);
325
326	if (ret == -EAGAIN) {
327		/* Type requests element to be completed */
328		pr_debug("element must be competed, ADD is triggered\n");
329		write_lock_bh(&set->lock);
330		set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
331		write_unlock_bh(&set->lock);
332		ret = 1;
333	}
334
335	/* Convert error codes to nomatch */
336	return (ret < 0 ? 0 : ret);
337}
338EXPORT_SYMBOL_GPL(ip_set_test);
339
340int
341ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
342	   u8 family, u8 dim, u8 flags)
343{
344	struct ip_set *set = ip_set_list[index];
345	int ret;
346
347	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
348	pr_debug("set %s, index %u\n", set->name, index);
349
350	if (dim < set->type->dimension ||
351	    !(family == set->family || set->family == AF_UNSPEC))
352		return 0;
353
354	write_lock_bh(&set->lock);
355	ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
356	write_unlock_bh(&set->lock);
357
358	return ret;
359}
360EXPORT_SYMBOL_GPL(ip_set_add);
361
362int
363ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
364	   u8 family, u8 dim, u8 flags)
365{
366	struct ip_set *set = ip_set_list[index];
367	int ret = 0;
368
369	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
370	pr_debug("set %s, index %u\n", set->name, index);
371
372	if (dim < set->type->dimension ||
373	    !(family == set->family || set->family == AF_UNSPEC))
374		return 0;
375
376	write_lock_bh(&set->lock);
377	ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags);
378	write_unlock_bh(&set->lock);
379
380	return ret;
381}
382EXPORT_SYMBOL_GPL(ip_set_del);
383
384/*
385 * Find set by name, reference it once. The reference makes sure the
386 * thing pointed to, does not go away under our feet.
387 *
388 * The nfnl mutex must already be activated.
389 */
390ip_set_id_t
391ip_set_get_byname(const char *name, struct ip_set **set)
392{
393	ip_set_id_t i, index = IPSET_INVALID_ID;
394	struct ip_set *s;
395
396	for (i = 0; i < ip_set_max; i++) {
397		s = ip_set_list[i];
398		if (s != NULL && STREQ(s->name, name)) {
399			__ip_set_get(i);
400			index = i;
401			*set = s;
402		}
403	}
404
405	return index;
406}
407EXPORT_SYMBOL_GPL(ip_set_get_byname);
408
409/*
410 * If the given set pointer points to a valid set, decrement
411 * reference count by 1. The caller shall not assume the index
412 * to be valid, after calling this function.
413 *
414 * The nfnl mutex must already be activated.
415 */
416void
417ip_set_put_byindex(ip_set_id_t index)
418{
419	if (ip_set_list[index] != NULL) {
420		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
421		__ip_set_put(index);
422	}
423}
424EXPORT_SYMBOL_GPL(ip_set_put_byindex);
425
426/*
427 * Get the name of a set behind a set index.
428 * We assume the set is referenced, so it does exist and
429 * can't be destroyed. The set cannot be renamed due to
430 * the referencing either.
431 *
432 * The nfnl mutex must already be activated.
433 */
434const char *
435ip_set_name_byindex(ip_set_id_t index)
436{
437	const struct ip_set *set = ip_set_list[index];
438
439	BUG_ON(set == NULL);
440	BUG_ON(atomic_read(&set->ref) == 0);
441
442	/* Referenced, so it's safe */
443	return set->name;
444}
445EXPORT_SYMBOL_GPL(ip_set_name_byindex);
446
447/*
448 * Routines to call by external subsystems, which do not
449 * call nfnl_lock for us.
450 */
451
452/*
453 * Find set by name, reference it once. The reference makes sure the
454 * thing pointed to, does not go away under our feet.
455 *
456 * The nfnl mutex is used in the function.
457 */
458ip_set_id_t
459ip_set_nfnl_get(const char *name)
460{
461	struct ip_set *s;
462	ip_set_id_t index;
463
464	nfnl_lock();
465	index = ip_set_get_byname(name, &s);
466	nfnl_unlock();
467
468	return index;
469}
470EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
471
472/*
473 * Find set by index, reference it once. The reference makes sure the
474 * thing pointed to, does not go away under our feet.
475 *
476 * The nfnl mutex is used in the function.
477 */
478ip_set_id_t
479ip_set_nfnl_get_byindex(ip_set_id_t index)
480{
481	if (index > ip_set_max)
482		return IPSET_INVALID_ID;
483
484	nfnl_lock();
485	if (ip_set_list[index])
486		__ip_set_get(index);
487	else
488		index = IPSET_INVALID_ID;
489	nfnl_unlock();
490
491	return index;
492}
493EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
494
495/*
496 * If the given set pointer points to a valid set, decrement
497 * reference count by 1. The caller shall not assume the index
498 * to be valid, after calling this function.
499 *
500 * The nfnl mutex is used in the function.
501 */
502void
503ip_set_nfnl_put(ip_set_id_t index)
504{
505	nfnl_lock();
506	if (ip_set_list[index] != NULL) {
507		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
508		__ip_set_put(index);
509	}
510	nfnl_unlock();
511}
512EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
513
514/*
515 * Communication protocol with userspace over netlink.
516 *
517 * We already locked by nfnl_lock.
518 */
519
520static inline bool
521protocol_failed(const struct nlattr * const tb[])
522{
523	return !tb[IPSET_ATTR_PROTOCOL] ||
524	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
525}
526
527static inline u32
528flag_exist(const struct nlmsghdr *nlh)
529{
530	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
531}
532
533static struct nlmsghdr *
534start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
535	  enum ipset_cmd cmd)
536{
537	struct nlmsghdr *nlh;
538	struct nfgenmsg *nfmsg;
539
540	nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
541			sizeof(*nfmsg), flags);
542	if (nlh == NULL)
543		return NULL;
544
545	nfmsg = nlmsg_data(nlh);
546	nfmsg->nfgen_family = AF_INET;
547	nfmsg->version = NFNETLINK_V0;
548	nfmsg->res_id = 0;
549
550	return nlh;
551}
552
553/* Create a set */
554
555static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
556	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
557	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
558				    .len = IPSET_MAXNAMELEN - 1 },
559	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
560				    .len = IPSET_MAXNAMELEN - 1},
561	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
562	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
563	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
564};
565
566static ip_set_id_t
567find_set_id(const char *name)
568{
569	ip_set_id_t i, index = IPSET_INVALID_ID;
570	const struct ip_set *set;
571
572	for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
573		set = ip_set_list[i];
574		if (set != NULL && STREQ(set->name, name))
575			index = i;
576	}
577	return index;
578}
579
580static inline struct ip_set *
581find_set(const char *name)
582{
583	ip_set_id_t index = find_set_id(name);
584
585	return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
586}
587
588static int
589find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
590{
591	ip_set_id_t i;
592
593	*index = IPSET_INVALID_ID;
594	for (i = 0;  i < ip_set_max; i++) {
595		if (ip_set_list[i] == NULL) {
596			if (*index == IPSET_INVALID_ID)
597				*index = i;
598		} else if (STREQ(name, ip_set_list[i]->name)) {
599			/* Name clash */
600			*set = ip_set_list[i];
601			return -EEXIST;
602		}
603	}
604	if (*index == IPSET_INVALID_ID)
605		/* No free slot remained */
606		return -IPSET_ERR_MAX_SETS;
607	return 0;
608}
609
610static int
611ip_set_create(struct sock *ctnl, struct sk_buff *skb,
612	      const struct nlmsghdr *nlh,
613	      const struct nlattr * const attr[])
614{
615	struct ip_set *set, *clash;
616	ip_set_id_t index = IPSET_INVALID_ID;
617	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
618	const char *name, *typename;
619	u8 family, revision;
620	u32 flags = flag_exist(nlh);
621	int ret = 0;
622
623	if (unlikely(protocol_failed(attr) ||
624		     attr[IPSET_ATTR_SETNAME] == NULL ||
625		     attr[IPSET_ATTR_TYPENAME] == NULL ||
626		     attr[IPSET_ATTR_REVISION] == NULL ||
627		     attr[IPSET_ATTR_FAMILY] == NULL ||
628		     (attr[IPSET_ATTR_DATA] != NULL &&
629		      !flag_nested(attr[IPSET_ATTR_DATA]))))
630		return -IPSET_ERR_PROTOCOL;
631
632	name = nla_data(attr[IPSET_ATTR_SETNAME]);
633	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
634	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
635	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
636	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
637		 name, typename, family_name(family), revision);
638
639	/*
640	 * First, and without any locks, allocate and initialize
641	 * a normal base set structure.
642	 */
643	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
644	if (!set)
645		return -ENOMEM;
646	rwlock_init(&set->lock);
647	strlcpy(set->name, name, IPSET_MAXNAMELEN);
648	atomic_set(&set->ref, 0);
649	set->family = family;
650
651	/*
652	 * Next, check that we know the type, and take
653	 * a reference on the type, to make sure it stays available
654	 * while constructing our new set.
655	 *
656	 * After referencing the type, we try to create the type
657	 * specific part of the set without holding any locks.
658	 */
659	ret = find_set_type_get(typename, family, revision, &(set->type));
660	if (ret)
661		goto out;
662
663	/*
664	 * Without holding any locks, create private part.
665	 */
666	if (attr[IPSET_ATTR_DATA] &&
667	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
668			     set->type->create_policy)) {
669	    	ret = -IPSET_ERR_PROTOCOL;
670	    	goto put_out;
671	}
672
673	ret = set->type->create(set, tb, flags);
674	if (ret != 0)
675		goto put_out;
676
677	/* BTW, ret==0 here. */
678
679	/*
680	 * Here, we have a valid, constructed set and we are protected
681	 * by nfnl_lock. Find the first free index in ip_set_list and
682	 * check clashing.
683	 */
684	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
685		/* If this is the same set and requested, ignore error */
686		if (ret == -EEXIST &&
687		    (flags & IPSET_FLAG_EXIST) &&
688		    STREQ(set->type->name, clash->type->name) &&
689		    set->type->family == clash->type->family &&
690		    set->type->revision == clash->type->revision &&
691		    set->variant->same_set(set, clash))
692			ret = 0;
693		goto cleanup;
694	}
695
696	/*
697	 * Finally! Add our shiny new set to the list, and be done.
698	 */
699	pr_debug("create: '%s' created with index %u!\n", set->name, index);
700	ip_set_list[index] = set;
701
702	return ret;
703
704cleanup:
705	set->variant->destroy(set);
706put_out:
707	module_put(set->type->me);
708out:
709	kfree(set);
710	return ret;
711}
712
713/* Destroy sets */
714
715static const struct nla_policy
716ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
717	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
718	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
719				    .len = IPSET_MAXNAMELEN - 1 },
720};
721
722static void
723ip_set_destroy_set(ip_set_id_t index)
724{
725	struct ip_set *set = ip_set_list[index];
726
727	pr_debug("set: %s\n",  set->name);
728	ip_set_list[index] = NULL;
729
730	/* Must call it without holding any lock */
731	set->variant->destroy(set);
732	module_put(set->type->me);
733	kfree(set);
734}
735
736static int
737ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
738	       const struct nlmsghdr *nlh,
739	       const struct nlattr * const attr[])
740{
741	ip_set_id_t i;
742
743	if (unlikely(protocol_failed(attr)))
744		return -IPSET_ERR_PROTOCOL;
745
746	/* References are protected by the nfnl mutex */
747	if (!attr[IPSET_ATTR_SETNAME]) {
748		for (i = 0; i < ip_set_max; i++) {
749			if (ip_set_list[i] != NULL &&
750			    (atomic_read(&ip_set_list[i]->ref)))
751				return -IPSET_ERR_BUSY;
752		}
753		for (i = 0; i < ip_set_max; i++) {
754			if (ip_set_list[i] != NULL)
755				ip_set_destroy_set(i);
756		}
757	} else {
758		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
759		if (i == IPSET_INVALID_ID)
760			return -ENOENT;
761		else if (atomic_read(&ip_set_list[i]->ref))
762			return -IPSET_ERR_BUSY;
763
764		ip_set_destroy_set(i);
765	}
766	return 0;
767}
768
769/* Flush sets */
770
771static void
772ip_set_flush_set(struct ip_set *set)
773{
774	pr_debug("set: %s\n",  set->name);
775
776	write_lock_bh(&set->lock);
777	set->variant->flush(set);
778	write_unlock_bh(&set->lock);
779}
780
781static int
782ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
783	     const struct nlmsghdr *nlh,
784	     const struct nlattr * const attr[])
785{
786	ip_set_id_t i;
787
788	if (unlikely(protocol_failed(attr)))
789		return -EPROTO;
790
791	if (!attr[IPSET_ATTR_SETNAME]) {
792		for (i = 0; i < ip_set_max; i++)
793			if (ip_set_list[i] != NULL)
794				ip_set_flush_set(ip_set_list[i]);
795	} else {
796		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
797		if (i == IPSET_INVALID_ID)
798			return -ENOENT;
799
800		ip_set_flush_set(ip_set_list[i]);
801	}
802
803	return 0;
804}
805
806/* Rename a set */
807
808static const struct nla_policy
809ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
810	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
811	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
812				    .len = IPSET_MAXNAMELEN - 1 },
813	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
814				    .len = IPSET_MAXNAMELEN - 1 },
815};
816
817static int
818ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
819	      const struct nlmsghdr *nlh,
820	      const struct nlattr * const attr[])
821{
822	struct ip_set *set;
823	const char *name2;
824	ip_set_id_t i;
825
826	if (unlikely(protocol_failed(attr) ||
827		     attr[IPSET_ATTR_SETNAME] == NULL ||
828		     attr[IPSET_ATTR_SETNAME2] == NULL))
829		return -IPSET_ERR_PROTOCOL;
830
831	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
832	if (set == NULL)
833		return -ENOENT;
834	if (atomic_read(&set->ref) != 0)
835		return -IPSET_ERR_REFERENCED;
836
837	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
838	for (i = 0; i < ip_set_max; i++) {
839		if (ip_set_list[i] != NULL &&
840		    STREQ(ip_set_list[i]->name, name2))
841			return -IPSET_ERR_EXIST_SETNAME2;
842	}
843	strncpy(set->name, name2, IPSET_MAXNAMELEN);
844
845	return 0;
846}
847
848/* Swap two sets so that name/index points to the other.
849 * References and set names are also swapped.
850 *
851 * We are protected by the nfnl mutex and references are
852 * manipulated only by holding the mutex. The kernel interfaces
853 * do not hold the mutex but the pointer settings are atomic
854 * so the ip_set_list always contains valid pointers to the sets.
855 */
856
857static int
858ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
859	    const struct nlmsghdr *nlh,
860	    const struct nlattr * const attr[])
861{
862	struct ip_set *from, *to;
863	ip_set_id_t from_id, to_id;
864	char from_name[IPSET_MAXNAMELEN];
865	u32 from_ref;
866
867	if (unlikely(protocol_failed(attr) ||
868		     attr[IPSET_ATTR_SETNAME] == NULL ||
869		     attr[IPSET_ATTR_SETNAME2] == NULL))
870		return -IPSET_ERR_PROTOCOL;
871
872	from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
873	if (from_id == IPSET_INVALID_ID)
874		return -ENOENT;
875
876	to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
877	if (to_id == IPSET_INVALID_ID)
878		return -IPSET_ERR_EXIST_SETNAME2;
879
880	from = ip_set_list[from_id];
881	to = ip_set_list[to_id];
882
883	/* Features must not change.
884	 * Not an artifical restriction anymore, as we must prevent
885	 * possible loops created by swapping in setlist type of sets. */
886	if (!(from->type->features == to->type->features &&
887	      from->type->family == to->type->family))
888		return -IPSET_ERR_TYPE_MISMATCH;
889
890	/* No magic here: ref munging protected by the nfnl_lock */
891	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
892	from_ref = atomic_read(&from->ref);
893
894	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
895	atomic_set(&from->ref, atomic_read(&to->ref));
896	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
897	atomic_set(&to->ref, from_ref);
898
899	ip_set_list[from_id] = to;
900	ip_set_list[to_id] = from;
901
902	return 0;
903}
904
905/* List/save set data */
906
907#define DUMP_INIT	0L
908#define DUMP_ALL	1L
909#define DUMP_ONE	2L
910#define DUMP_LAST	3L
911
912static int
913ip_set_dump_done(struct netlink_callback *cb)
914{
915	if (cb->args[2]) {
916		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
917		__ip_set_put((ip_set_id_t) cb->args[1]);
918	}
919	return 0;
920}
921
922static inline void
923dump_attrs(struct nlmsghdr *nlh)
924{
925	const struct nlattr *attr;
926	int rem;
927
928	pr_debug("dump nlmsg\n");
929	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
930		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
931	}
932}
933
934static int
935dump_init(struct netlink_callback *cb)
936{
937	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
938	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
939	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
940	struct nlattr *attr = (void *)nlh + min_len;
941	ip_set_id_t index;
942
943	/* Second pass, so parser can't fail */
944	nla_parse(cda, IPSET_ATTR_CMD_MAX,
945		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
946
947	/* cb->args[0] : dump single set/all sets
948	 *         [1] : set index
949	 *         [..]: type specific
950	 */
951
952	if (!cda[IPSET_ATTR_SETNAME]) {
953		cb->args[0] = DUMP_ALL;
954		return 0;
955	}
956
957	index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
958	if (index == IPSET_INVALID_ID)
959		return -ENOENT;
960
961	cb->args[0] = DUMP_ONE;
962	cb->args[1] = index;
963	return 0;
964}
965
966static int
967ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
968{
969	ip_set_id_t index = IPSET_INVALID_ID, max;
970	struct ip_set *set = NULL;
971	struct nlmsghdr *nlh = NULL;
972	unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
973	int ret = 0;
974
975	if (cb->args[0] == DUMP_INIT) {
976		ret = dump_init(cb);
977		if (ret < 0) {
978			nlh = nlmsg_hdr(cb->skb);
979			/* We have to create and send the error message
980			 * manually :-( */
981			if (nlh->nlmsg_flags & NLM_F_ACK)
982				netlink_ack(cb->skb, nlh, ret);
983			return ret;
984		}
985	}
986
987	if (cb->args[1] >= ip_set_max)
988		goto out;
989
990	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
991	max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
992	for (; cb->args[1] < max; cb->args[1]++) {
993		index = (ip_set_id_t) cb->args[1];
994		set = ip_set_list[index];
995		if (set == NULL) {
996			if (cb->args[0] == DUMP_ONE) {
997				ret = -ENOENT;
998				goto out;
999			}
1000			continue;
1001		}
1002		/* When dumping all sets, we must dump "sorted"
1003		 * so that lists (unions of sets) are dumped last.
1004		 */
1005		if (cb->args[0] != DUMP_ONE &&
1006		    !((cb->args[0] == DUMP_ALL) ^
1007		      (set->type->features & IPSET_DUMP_LAST)))
1008			continue;
1009		pr_debug("List set: %s\n", set->name);
1010		if (!cb->args[2]) {
1011			/* Start listing: make sure set won't be destroyed */
1012			pr_debug("reference set\n");
1013			__ip_set_get(index);
1014		}
1015		nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
1016				cb->nlh->nlmsg_seq, flags,
1017				IPSET_CMD_LIST);
1018		if (!nlh) {
1019			ret = -EMSGSIZE;
1020			goto release_refcount;
1021		}
1022		NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1023		NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
1024		switch (cb->args[2]) {
1025		case 0:
1026			/* Core header data */
1027			NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
1028				       set->type->name);
1029			NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
1030				   set->family);
1031			NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
1032				   set->type->revision);
1033			ret = set->variant->head(set, skb);
1034			if (ret < 0)
1035				goto release_refcount;
1036			/* Fall through and add elements */
1037		default:
1038			read_lock_bh(&set->lock);
1039			ret = set->variant->list(set, skb, cb);
1040			read_unlock_bh(&set->lock);
1041			if (!cb->args[2]) {
1042				/* Set is done, proceed with next one */
1043				if (cb->args[0] == DUMP_ONE)
1044					cb->args[1] = IPSET_INVALID_ID;
1045				else
1046					cb->args[1]++;
1047			}
1048			goto release_refcount;
1049		}
1050	}
1051	goto out;
1052
1053nla_put_failure:
1054	ret = -EFAULT;
1055release_refcount:
1056	/* If there was an error or set is done, release set */
1057	if (ret || !cb->args[2]) {
1058		pr_debug("release set %s\n", ip_set_list[index]->name);
1059		__ip_set_put(index);
1060	}
1061
1062	/* If we dump all sets, continue with dumping last ones */
1063	if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
1064		cb->args[0] = DUMP_LAST;
1065
1066out:
1067	if (nlh) {
1068		nlmsg_end(skb, nlh);
1069		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1070		dump_attrs(nlh);
1071	}
1072
1073	return ret < 0 ? ret : skb->len;
1074}
1075
1076static int
1077ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1078	    const struct nlmsghdr *nlh,
1079	    const struct nlattr * const attr[])
1080{
1081	if (unlikely(protocol_failed(attr)))
1082		return -IPSET_ERR_PROTOCOL;
1083
1084	return netlink_dump_start(ctnl, skb, nlh,
1085				  ip_set_dump_start,
1086				  ip_set_dump_done);
1087}
1088
1089/* Add, del and test */
1090
1091static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1092	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1093	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
1094				    .len = IPSET_MAXNAMELEN - 1 },
1095	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
1096	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
1097	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
1098};
1099
1100static int
1101call_ad(struct sk_buff *skb, struct ip_set *set,
1102	struct nlattr *tb[], enum ipset_adt adt,
1103	u32 flags, bool use_lineno)
1104{
1105	int ret, retried = 0;
1106	u32 lineno = 0;
1107	bool eexist = flags & IPSET_FLAG_EXIST;
1108
1109	do {
1110		write_lock_bh(&set->lock);
1111		ret = set->variant->uadt(set, tb, adt, &lineno, flags);
1112		write_unlock_bh(&set->lock);
1113	} while (ret == -EAGAIN &&
1114		 set->variant->resize &&
1115		 (ret = set->variant->resize(set, retried++)) == 0);
1116
1117	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1118		return 0;
1119	if (lineno && use_lineno) {
1120		/* Error in restore/batch mode: send back lineno */
1121		struct nlmsghdr *nlh = nlmsg_hdr(skb);
1122		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1123		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1124		struct nlattr *cmdattr = (void *)nlh + min_len;
1125		u32 *errline;
1126
1127		nla_parse(cda, IPSET_ATTR_CMD_MAX,
1128			  cmdattr, nlh->nlmsg_len - min_len,
1129			  ip_set_adt_policy);
1130
1131		errline = nla_data(cda[IPSET_ATTR_LINENO]);
1132
1133		*errline = lineno;
1134	}
1135
1136	return ret;
1137}
1138
1139static int
1140ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1141	    const struct nlmsghdr *nlh,
1142	    const struct nlattr * const attr[])
1143{
1144	struct ip_set *set;
1145	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1146	const struct nlattr *nla;
1147	u32 flags = flag_exist(nlh);
1148	bool use_lineno;
1149	int ret = 0;
1150
1151	if (unlikely(protocol_failed(attr) ||
1152		     attr[IPSET_ATTR_SETNAME] == NULL ||
1153		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1154		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1155		     (attr[IPSET_ATTR_DATA] != NULL &&
1156		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1157		     (attr[IPSET_ATTR_ADT] != NULL &&
1158		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1159		       attr[IPSET_ATTR_LINENO] == NULL))))
1160		return -IPSET_ERR_PROTOCOL;
1161
1162	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1163	if (set == NULL)
1164		return -ENOENT;
1165
1166	use_lineno = !!attr[IPSET_ATTR_LINENO];
1167	if (attr[IPSET_ATTR_DATA]) {
1168		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1169				     attr[IPSET_ATTR_DATA],
1170				     set->type->adt_policy))
1171			return -IPSET_ERR_PROTOCOL;
1172		ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno);
1173	} else {
1174		int nla_rem;
1175
1176		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1177			memset(tb, 0, sizeof(tb));
1178			if (nla_type(nla) != IPSET_ATTR_DATA ||
1179			    !flag_nested(nla) ||
1180			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1181					     set->type->adt_policy))
1182				return -IPSET_ERR_PROTOCOL;
1183			ret = call_ad(skb, set, tb, IPSET_ADD,
1184				      flags, use_lineno);
1185			if (ret < 0)
1186				return ret;
1187		}
1188	}
1189	return ret;
1190}
1191
1192static int
1193ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1194	    const struct nlmsghdr *nlh,
1195	    const struct nlattr * const attr[])
1196{
1197	struct ip_set *set;
1198	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1199	const struct nlattr *nla;
1200	u32 flags = flag_exist(nlh);
1201	bool use_lineno;
1202	int ret = 0;
1203
1204	if (unlikely(protocol_failed(attr) ||
1205		     attr[IPSET_ATTR_SETNAME] == NULL ||
1206		     !((attr[IPSET_ATTR_DATA] != NULL) ^
1207		       (attr[IPSET_ATTR_ADT] != NULL)) ||
1208		     (attr[IPSET_ATTR_DATA] != NULL &&
1209		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
1210		     (attr[IPSET_ATTR_ADT] != NULL &&
1211		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1212		       attr[IPSET_ATTR_LINENO] == NULL))))
1213		return -IPSET_ERR_PROTOCOL;
1214
1215	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1216	if (set == NULL)
1217		return -ENOENT;
1218
1219	use_lineno = !!attr[IPSET_ATTR_LINENO];
1220	if (attr[IPSET_ATTR_DATA]) {
1221		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1222				     attr[IPSET_ATTR_DATA],
1223				     set->type->adt_policy))
1224			return -IPSET_ERR_PROTOCOL;
1225		ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno);
1226	} else {
1227		int nla_rem;
1228
1229		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1230			memset(tb, 0, sizeof(*tb));
1231			if (nla_type(nla) != IPSET_ATTR_DATA ||
1232			    !flag_nested(nla) ||
1233			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1234					     set->type->adt_policy))
1235				return -IPSET_ERR_PROTOCOL;
1236			ret = call_ad(skb, set, tb, IPSET_DEL,
1237				      flags, use_lineno);
1238			if (ret < 0)
1239				return ret;
1240		}
1241	}
1242	return ret;
1243}
1244
1245static int
1246ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1247	     const struct nlmsghdr *nlh,
1248	     const struct nlattr * const attr[])
1249{
1250	struct ip_set *set;
1251	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1252	int ret = 0;
1253
1254	if (unlikely(protocol_failed(attr) ||
1255		     attr[IPSET_ATTR_SETNAME] == NULL ||
1256		     attr[IPSET_ATTR_DATA] == NULL ||
1257		     !flag_nested(attr[IPSET_ATTR_DATA])))
1258		return -IPSET_ERR_PROTOCOL;
1259
1260	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1261	if (set == NULL)
1262		return -ENOENT;
1263
1264	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1265			     set->type->adt_policy))
1266		return -IPSET_ERR_PROTOCOL;
1267
1268	read_lock_bh(&set->lock);
1269	ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0);
1270	read_unlock_bh(&set->lock);
1271	/* Userspace can't trigger element to be re-added */
1272	if (ret == -EAGAIN)
1273		ret = 1;
1274
1275	return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1276}
1277
1278/* Get headed data of a set */
1279
1280static int
1281ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1282	      const struct nlmsghdr *nlh,
1283	      const struct nlattr * const attr[])
1284{
1285	const struct ip_set *set;
1286	struct sk_buff *skb2;
1287	struct nlmsghdr *nlh2;
1288	ip_set_id_t index;
1289	int ret = 0;
1290
1291	if (unlikely(protocol_failed(attr) ||
1292		     attr[IPSET_ATTR_SETNAME] == NULL))
1293		return -IPSET_ERR_PROTOCOL;
1294
1295	index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1296	if (index == IPSET_INVALID_ID)
1297		return -ENOENT;
1298	set = ip_set_list[index];
1299
1300	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1301	if (skb2 == NULL)
1302		return -ENOMEM;
1303
1304	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1305			 IPSET_CMD_HEADER);
1306	if (!nlh2)
1307		goto nlmsg_failure;
1308	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1309	NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
1310	NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
1311	NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family);
1312	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
1313	nlmsg_end(skb2, nlh2);
1314
1315	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1316	if (ret < 0)
1317		return ret;
1318
1319	return 0;
1320
1321nla_put_failure:
1322	nlmsg_cancel(skb2, nlh2);
1323nlmsg_failure:
1324	kfree_skb(skb2);
1325	return -EMSGSIZE;
1326}
1327
1328/* Get type data */
1329
1330static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1331	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1332	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
1333				    .len = IPSET_MAXNAMELEN - 1 },
1334	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
1335};
1336
1337static int
1338ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1339	    const struct nlmsghdr *nlh,
1340	    const struct nlattr * const attr[])
1341{
1342	struct sk_buff *skb2;
1343	struct nlmsghdr *nlh2;
1344	u8 family, min, max;
1345	const char *typename;
1346	int ret = 0;
1347
1348	if (unlikely(protocol_failed(attr) ||
1349		     attr[IPSET_ATTR_TYPENAME] == NULL ||
1350		     attr[IPSET_ATTR_FAMILY] == NULL))
1351		return -IPSET_ERR_PROTOCOL;
1352
1353	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1354	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1355	ret = find_set_type_minmax(typename, family, &min, &max);
1356	if (ret)
1357		return ret;
1358
1359	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1360	if (skb2 == NULL)
1361		return -ENOMEM;
1362
1363	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1364			 IPSET_CMD_TYPE);
1365	if (!nlh2)
1366		goto nlmsg_failure;
1367	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1368	NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
1369	NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
1370	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
1371	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
1372	nlmsg_end(skb2, nlh2);
1373
1374	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1375	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1376	if (ret < 0)
1377		return ret;
1378
1379	return 0;
1380
1381nla_put_failure:
1382	nlmsg_cancel(skb2, nlh2);
1383nlmsg_failure:
1384	kfree_skb(skb2);
1385	return -EMSGSIZE;
1386}
1387
1388/* Get protocol version */
1389
1390static const struct nla_policy
1391ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1392	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
1393};
1394
1395static int
1396ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1397		const struct nlmsghdr *nlh,
1398		const struct nlattr * const attr[])
1399{
1400	struct sk_buff *skb2;
1401	struct nlmsghdr *nlh2;
1402	int ret = 0;
1403
1404	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1405		return -IPSET_ERR_PROTOCOL;
1406
1407	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1408	if (skb2 == NULL)
1409		return -ENOMEM;
1410
1411	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1412			 IPSET_CMD_PROTOCOL);
1413	if (!nlh2)
1414		goto nlmsg_failure;
1415	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1416	nlmsg_end(skb2, nlh2);
1417
1418	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1419	if (ret < 0)
1420		return ret;
1421
1422	return 0;
1423
1424nla_put_failure:
1425	nlmsg_cancel(skb2, nlh2);
1426nlmsg_failure:
1427	kfree_skb(skb2);
1428	return -EMSGSIZE;
1429}
1430
1431static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1432	[IPSET_CMD_CREATE]	= {
1433		.call		= ip_set_create,
1434		.attr_count	= IPSET_ATTR_CMD_MAX,
1435		.policy		= ip_set_create_policy,
1436	},
1437	[IPSET_CMD_DESTROY]	= {
1438		.call		= ip_set_destroy,
1439		.attr_count	= IPSET_ATTR_CMD_MAX,
1440		.policy		= ip_set_setname_policy,
1441	},
1442	[IPSET_CMD_FLUSH]	= {
1443		.call		= ip_set_flush,
1444		.attr_count	= IPSET_ATTR_CMD_MAX,
1445		.policy		= ip_set_setname_policy,
1446	},
1447	[IPSET_CMD_RENAME]	= {
1448		.call		= ip_set_rename,
1449		.attr_count	= IPSET_ATTR_CMD_MAX,
1450		.policy		= ip_set_setname2_policy,
1451	},
1452	[IPSET_CMD_SWAP]	= {
1453		.call		= ip_set_swap,
1454		.attr_count	= IPSET_ATTR_CMD_MAX,
1455		.policy		= ip_set_setname2_policy,
1456	},
1457	[IPSET_CMD_LIST]	= {
1458		.call		= ip_set_dump,
1459		.attr_count	= IPSET_ATTR_CMD_MAX,
1460		.policy		= ip_set_setname_policy,
1461	},
1462	[IPSET_CMD_SAVE]	= {
1463		.call		= ip_set_dump,
1464		.attr_count	= IPSET_ATTR_CMD_MAX,
1465		.policy		= ip_set_setname_policy,
1466	},
1467	[IPSET_CMD_ADD]	= {
1468		.call		= ip_set_uadd,
1469		.attr_count	= IPSET_ATTR_CMD_MAX,
1470		.policy		= ip_set_adt_policy,
1471	},
1472	[IPSET_CMD_DEL]	= {
1473		.call		= ip_set_udel,
1474		.attr_count	= IPSET_ATTR_CMD_MAX,
1475		.policy		= ip_set_adt_policy,
1476	},
1477	[IPSET_CMD_TEST]	= {
1478		.call		= ip_set_utest,
1479		.attr_count	= IPSET_ATTR_CMD_MAX,
1480		.policy		= ip_set_adt_policy,
1481	},
1482	[IPSET_CMD_HEADER]	= {
1483		.call		= ip_set_header,
1484		.attr_count	= IPSET_ATTR_CMD_MAX,
1485		.policy		= ip_set_setname_policy,
1486	},
1487	[IPSET_CMD_TYPE]	= {
1488		.call		= ip_set_type,
1489		.attr_count	= IPSET_ATTR_CMD_MAX,
1490		.policy		= ip_set_type_policy,
1491	},
1492	[IPSET_CMD_PROTOCOL]	= {
1493		.call		= ip_set_protocol,
1494		.attr_count	= IPSET_ATTR_CMD_MAX,
1495		.policy		= ip_set_protocol_policy,
1496	},
1497};
1498
1499static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1500	.name		= "ip_set",
1501	.subsys_id	= NFNL_SUBSYS_IPSET,
1502	.cb_count	= IPSET_MSG_MAX,
1503	.cb		= ip_set_netlink_subsys_cb,
1504};
1505
1506/* Interface to iptables/ip6tables */
1507
1508static int
1509ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1510{
1511	unsigned *op;
1512	void *data;
1513	int copylen = *len, ret = 0;
1514
1515	if (!capable(CAP_NET_ADMIN))
1516		return -EPERM;
1517	if (optval != SO_IP_SET)
1518		return -EBADF;
1519	if (*len < sizeof(unsigned))
1520		return -EINVAL;
1521
1522	data = vmalloc(*len);
1523	if (!data)
1524		return -ENOMEM;
1525	if (copy_from_user(data, user, *len) != 0) {
1526		ret = -EFAULT;
1527		goto done;
1528	}
1529	op = (unsigned *) data;
1530
1531	if (*op < IP_SET_OP_VERSION) {
1532		/* Check the version at the beginning of operations */
1533		struct ip_set_req_version *req_version = data;
1534		if (req_version->version != IPSET_PROTOCOL) {
1535			ret = -EPROTO;
1536			goto done;
1537		}
1538	}
1539
1540	switch (*op) {
1541	case IP_SET_OP_VERSION: {
1542		struct ip_set_req_version *req_version = data;
1543
1544		if (*len != sizeof(struct ip_set_req_version)) {
1545			ret = -EINVAL;
1546			goto done;
1547		}
1548
1549		req_version->version = IPSET_PROTOCOL;
1550		ret = copy_to_user(user, req_version,
1551				   sizeof(struct ip_set_req_version));
1552		goto done;
1553	}
1554	case IP_SET_OP_GET_BYNAME: {
1555		struct ip_set_req_get_set *req_get = data;
1556
1557		if (*len != sizeof(struct ip_set_req_get_set)) {
1558			ret = -EINVAL;
1559			goto done;
1560		}
1561		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1562		nfnl_lock();
1563		req_get->set.index = find_set_id(req_get->set.name);
1564		nfnl_unlock();
1565		goto copy;
1566	}
1567	case IP_SET_OP_GET_BYINDEX: {
1568		struct ip_set_req_get_set *req_get = data;
1569
1570		if (*len != sizeof(struct ip_set_req_get_set) ||
1571		    req_get->set.index >= ip_set_max) {
1572			ret = -EINVAL;
1573			goto done;
1574		}
1575		nfnl_lock();
1576		strncpy(req_get->set.name,
1577			ip_set_list[req_get->set.index]
1578				? ip_set_list[req_get->set.index]->name : "",
1579			IPSET_MAXNAMELEN);
1580		nfnl_unlock();
1581		goto copy;
1582	}
1583	default:
1584		ret = -EBADMSG;
1585		goto done;
1586	}	/* end of switch(op) */
1587
1588copy:
1589	ret = copy_to_user(user, data, copylen);
1590
1591done:
1592	vfree(data);
1593	if (ret > 0)
1594		ret = 0;
1595	return ret;
1596}
1597
1598static struct nf_sockopt_ops so_set __read_mostly = {
1599	.pf		= PF_INET,
1600	.get_optmin	= SO_IP_SET,
1601	.get_optmax	= SO_IP_SET + 1,
1602	.get		= &ip_set_sockfn_get,
1603	.owner		= THIS_MODULE,
1604};
1605
1606static int __init
1607ip_set_init(void)
1608{
1609	int ret;
1610
1611	if (max_sets)
1612		ip_set_max = max_sets;
1613	if (ip_set_max >= IPSET_INVALID_ID)
1614		ip_set_max = IPSET_INVALID_ID - 1;
1615
1616	ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1617			      GFP_KERNEL);
1618	if (!ip_set_list) {
1619		pr_err("ip_set: Unable to create ip_set_list\n");
1620		return -ENOMEM;
1621	}
1622
1623	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1624	if (ret != 0) {
1625		pr_err("ip_set: cannot register with nfnetlink.\n");
1626		kfree(ip_set_list);
1627		return ret;
1628	}
1629	ret = nf_register_sockopt(&so_set);
1630	if (ret != 0) {
1631		pr_err("SO_SET registry failed: %d\n", ret);
1632		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1633		kfree(ip_set_list);
1634		return ret;
1635	}
1636
1637	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1638	return 0;
1639}
1640
1641static void __exit
1642ip_set_fini(void)
1643{
1644	/* There can't be any existing set */
1645	nf_unregister_sockopt(&so_set);
1646	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1647	kfree(ip_set_list);
1648	pr_debug("these are the famous last words\n");
1649}
1650
1651module_init(ip_set_init);
1652module_exit(ip_set_fini);
1653