1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/list.h>
15#include <linux/rbtree.h>
16#include <linux/netlink.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables.h>
20
21static DEFINE_SPINLOCK(nft_rbtree_lock);
22
23struct nft_rbtree {
24	struct rb_root		root;
25};
26
27struct nft_rbtree_elem {
28	struct rb_node		node;
29	u16			flags;
30	struct nft_data		key;
31	struct nft_data		data[];
32};
33
34static bool nft_rbtree_lookup(const struct nft_set *set,
35			      const struct nft_data *key,
36			      struct nft_data *data)
37{
38	const struct nft_rbtree *priv = nft_set_priv(set);
39	const struct nft_rbtree_elem *rbe, *interval = NULL;
40	const struct rb_node *parent = priv->root.rb_node;
41	int d;
42
43	spin_lock_bh(&nft_rbtree_lock);
44	while (parent != NULL) {
45		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
46
47		d = nft_data_cmp(&rbe->key, key, set->klen);
48		if (d < 0) {
49			parent = parent->rb_left;
50			interval = rbe;
51		} else if (d > 0)
52			parent = parent->rb_right;
53		else {
54found:
55			if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
56				goto out;
57			if (set->flags & NFT_SET_MAP)
58				nft_data_copy(data, rbe->data);
59
60			spin_unlock_bh(&nft_rbtree_lock);
61			return true;
62		}
63	}
64
65	if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
66		rbe = interval;
67		goto found;
68	}
69out:
70	spin_unlock_bh(&nft_rbtree_lock);
71	return false;
72}
73
74static void nft_rbtree_elem_destroy(const struct nft_set *set,
75				    struct nft_rbtree_elem *rbe)
76{
77	nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
78	if (set->flags & NFT_SET_MAP &&
79	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
80		nft_data_uninit(rbe->data, set->dtype);
81
82	kfree(rbe);
83}
84
85static int __nft_rbtree_insert(const struct nft_set *set,
86			       struct nft_rbtree_elem *new)
87{
88	struct nft_rbtree *priv = nft_set_priv(set);
89	struct nft_rbtree_elem *rbe;
90	struct rb_node *parent, **p;
91	int d;
92
93	parent = NULL;
94	p = &priv->root.rb_node;
95	while (*p != NULL) {
96		parent = *p;
97		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
98		d = nft_data_cmp(&rbe->key, &new->key, set->klen);
99		if (d < 0)
100			p = &parent->rb_left;
101		else if (d > 0)
102			p = &parent->rb_right;
103		else
104			return -EEXIST;
105	}
106	rb_link_node(&new->node, parent, p);
107	rb_insert_color(&new->node, &priv->root);
108	return 0;
109}
110
111static int nft_rbtree_insert(const struct nft_set *set,
112			     const struct nft_set_elem *elem)
113{
114	struct nft_rbtree_elem *rbe;
115	unsigned int size;
116	int err;
117
118	size = sizeof(*rbe);
119	if (set->flags & NFT_SET_MAP &&
120	    !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
121		size += sizeof(rbe->data[0]);
122
123	rbe = kzalloc(size, GFP_KERNEL);
124	if (rbe == NULL)
125		return -ENOMEM;
126
127	rbe->flags = elem->flags;
128	nft_data_copy(&rbe->key, &elem->key);
129	if (set->flags & NFT_SET_MAP &&
130	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
131		nft_data_copy(rbe->data, &elem->data);
132
133	spin_lock_bh(&nft_rbtree_lock);
134	err = __nft_rbtree_insert(set, rbe);
135	if (err < 0)
136		kfree(rbe);
137
138	spin_unlock_bh(&nft_rbtree_lock);
139	return err;
140}
141
142static void nft_rbtree_remove(const struct nft_set *set,
143			      const struct nft_set_elem *elem)
144{
145	struct nft_rbtree *priv = nft_set_priv(set);
146	struct nft_rbtree_elem *rbe = elem->cookie;
147
148	spin_lock_bh(&nft_rbtree_lock);
149	rb_erase(&rbe->node, &priv->root);
150	spin_unlock_bh(&nft_rbtree_lock);
151	kfree(rbe);
152}
153
154static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
155{
156	const struct nft_rbtree *priv = nft_set_priv(set);
157	const struct rb_node *parent = priv->root.rb_node;
158	struct nft_rbtree_elem *rbe;
159	int d;
160
161	spin_lock_bh(&nft_rbtree_lock);
162	while (parent != NULL) {
163		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
164
165		d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
166		if (d < 0)
167			parent = parent->rb_left;
168		else if (d > 0)
169			parent = parent->rb_right;
170		else {
171			elem->cookie = rbe;
172			if (set->flags & NFT_SET_MAP &&
173			    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
174				nft_data_copy(&elem->data, rbe->data);
175			elem->flags = rbe->flags;
176			spin_unlock_bh(&nft_rbtree_lock);
177			return 0;
178		}
179	}
180	spin_unlock_bh(&nft_rbtree_lock);
181	return -ENOENT;
182}
183
184static void nft_rbtree_walk(const struct nft_ctx *ctx,
185			    const struct nft_set *set,
186			    struct nft_set_iter *iter)
187{
188	const struct nft_rbtree *priv = nft_set_priv(set);
189	const struct nft_rbtree_elem *rbe;
190	struct nft_set_elem elem;
191	struct rb_node *node;
192
193	spin_lock_bh(&nft_rbtree_lock);
194	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
195		if (iter->count < iter->skip)
196			goto cont;
197
198		rbe = rb_entry(node, struct nft_rbtree_elem, node);
199		nft_data_copy(&elem.key, &rbe->key);
200		if (set->flags & NFT_SET_MAP &&
201		    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
202			nft_data_copy(&elem.data, rbe->data);
203		elem.flags = rbe->flags;
204
205		iter->err = iter->fn(ctx, set, iter, &elem);
206		if (iter->err < 0) {
207			spin_unlock_bh(&nft_rbtree_lock);
208			return;
209		}
210cont:
211		iter->count++;
212	}
213	spin_unlock_bh(&nft_rbtree_lock);
214}
215
216static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
217{
218	return sizeof(struct nft_rbtree);
219}
220
221static int nft_rbtree_init(const struct nft_set *set,
222			   const struct nft_set_desc *desc,
223			   const struct nlattr * const nla[])
224{
225	struct nft_rbtree *priv = nft_set_priv(set);
226
227	priv->root = RB_ROOT;
228	return 0;
229}
230
231static void nft_rbtree_destroy(const struct nft_set *set)
232{
233	struct nft_rbtree *priv = nft_set_priv(set);
234	struct nft_rbtree_elem *rbe;
235	struct rb_node *node;
236
237	while ((node = priv->root.rb_node) != NULL) {
238		rb_erase(node, &priv->root);
239		rbe = rb_entry(node, struct nft_rbtree_elem, node);
240		nft_rbtree_elem_destroy(set, rbe);
241	}
242}
243
244static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
245				struct nft_set_estimate *est)
246{
247	unsigned int nsize;
248
249	nsize = sizeof(struct nft_rbtree_elem);
250	if (features & NFT_SET_MAP)
251		nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
252
253	if (desc->size)
254		est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
255	else
256		est->size = nsize;
257
258	est->class = NFT_SET_CLASS_O_LOG_N;
259
260	return true;
261}
262
263static struct nft_set_ops nft_rbtree_ops __read_mostly = {
264	.privsize	= nft_rbtree_privsize,
265	.estimate	= nft_rbtree_estimate,
266	.init		= nft_rbtree_init,
267	.destroy	= nft_rbtree_destroy,
268	.insert		= nft_rbtree_insert,
269	.remove		= nft_rbtree_remove,
270	.get		= nft_rbtree_get,
271	.lookup		= nft_rbtree_lookup,
272	.walk		= nft_rbtree_walk,
273	.features	= NFT_SET_INTERVAL | NFT_SET_MAP,
274	.owner		= THIS_MODULE,
275};
276
277static int __init nft_rbtree_module_init(void)
278{
279	return nft_register_set(&nft_rbtree_ops);
280}
281
282static void __exit nft_rbtree_module_exit(void)
283{
284	nft_unregister_set(&nft_rbtree_ops);
285}
286
287module_init(nft_rbtree_module_init);
288module_exit(nft_rbtree_module_exit);
289
290MODULE_LICENSE("GPL");
291MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
292MODULE_ALIAS_NFT_SET();
293