cls_cgroup.c revision 8a8e04df4747661daaee77e98e102d99c9e09b98
1/*
2 * net/sched/cls_cgroup.c	Control Group Classifier
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Thomas Graf <tgraf@suug.ch>
10 */
11
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/types.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/skbuff.h>
18#include <linux/cgroup.h>
19#include <linux/rcupdate.h>
20#include <net/rtnetlink.h>
21#include <net/pkt_cls.h>
22#include <net/sock.h>
23#include <net/cls_cgroup.h>
24
25static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
26{
27	return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
28			    struct cgroup_cls_state, css);
29}
30
31static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
32{
33	return container_of(task_subsys_state(p, net_cls_subsys_id),
34			    struct cgroup_cls_state, css);
35}
36
37static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
38{
39	struct cgroup_cls_state *cs;
40
41	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
42	if (!cs)
43		return ERR_PTR(-ENOMEM);
44
45	if (cgrp->parent)
46		cs->classid = cgrp_cls_state(cgrp->parent)->classid;
47
48	return &cs->css;
49}
50
51static void cgrp_destroy(struct cgroup *cgrp)
52{
53	kfree(cgrp_cls_state(cgrp));
54}
55
56static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
57{
58	return cgrp_cls_state(cgrp)->classid;
59}
60
61static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
62{
63	cgrp_cls_state(cgrp)->classid = (u32) value;
64	return 0;
65}
66
67static struct cftype ss_files[] = {
68	{
69		.name = "classid",
70		.read_u64 = read_classid,
71		.write_u64 = write_classid,
72	},
73	{ }	/* terminate */
74};
75
76struct cgroup_subsys net_cls_subsys = {
77	.name		= "net_cls",
78	.create		= cgrp_create,
79	.destroy	= cgrp_destroy,
80	.subsys_id	= net_cls_subsys_id,
81	.base_cftypes	= ss_files,
82	.module		= THIS_MODULE,
83};
84
85struct cls_cgroup_head {
86	u32			handle;
87	struct tcf_exts		exts;
88	struct tcf_ematch_tree	ematches;
89};
90
91static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
92			       struct tcf_result *res)
93{
94	struct cls_cgroup_head *head = tp->root;
95	u32 classid;
96
97	rcu_read_lock();
98	classid = task_cls_state(current)->classid;
99	rcu_read_unlock();
100
101	/*
102	 * Due to the nature of the classifier it is required to ignore all
103	 * packets originating from softirq context as accessing `current'
104	 * would lead to false results.
105	 *
106	 * This test assumes that all callers of dev_queue_xmit() explicitely
107	 * disable bh. Knowing this, it is possible to detect softirq based
108	 * calls by looking at the number of nested bh disable calls because
109	 * softirqs always disables bh.
110	 */
111	if (in_serving_softirq()) {
112		/* If there is an sk_classid we'll use that. */
113		if (!skb->sk)
114			return -1;
115		classid = skb->sk->sk_classid;
116	}
117
118	if (!classid)
119		return -1;
120
121	if (!tcf_em_tree_match(skb, &head->ematches, NULL))
122		return -1;
123
124	res->classid = classid;
125	res->class = 0;
126	return tcf_exts_exec(skb, &head->exts, res);
127}
128
129static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
130{
131	return 0UL;
132}
133
134static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
135{
136}
137
138static int cls_cgroup_init(struct tcf_proto *tp)
139{
140	return 0;
141}
142
143static const struct tcf_ext_map cgroup_ext_map = {
144	.action = TCA_CGROUP_ACT,
145	.police = TCA_CGROUP_POLICE,
146};
147
148static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
149	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
150};
151
152static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
153			     u32 handle, struct nlattr **tca,
154			     unsigned long *arg)
155{
156	struct nlattr *tb[TCA_CGROUP_MAX + 1];
157	struct cls_cgroup_head *head = tp->root;
158	struct tcf_ematch_tree t;
159	struct tcf_exts e;
160	int err;
161
162	if (!tca[TCA_OPTIONS])
163		return -EINVAL;
164
165	if (head == NULL) {
166		if (!handle)
167			return -EINVAL;
168
169		head = kzalloc(sizeof(*head), GFP_KERNEL);
170		if (head == NULL)
171			return -ENOBUFS;
172
173		head->handle = handle;
174
175		tcf_tree_lock(tp);
176		tp->root = head;
177		tcf_tree_unlock(tp);
178	}
179
180	if (handle != head->handle)
181		return -ENOENT;
182
183	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
184			       cgroup_policy);
185	if (err < 0)
186		return err;
187
188	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
189	if (err < 0)
190		return err;
191
192	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
193	if (err < 0)
194		return err;
195
196	tcf_exts_change(tp, &head->exts, &e);
197	tcf_em_tree_change(tp, &head->ematches, &t);
198
199	return 0;
200}
201
202static void cls_cgroup_destroy(struct tcf_proto *tp)
203{
204	struct cls_cgroup_head *head = tp->root;
205
206	if (head) {
207		tcf_exts_destroy(tp, &head->exts);
208		tcf_em_tree_destroy(tp, &head->ematches);
209		kfree(head);
210	}
211}
212
213static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
214{
215	return -EOPNOTSUPP;
216}
217
218static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
219{
220	struct cls_cgroup_head *head = tp->root;
221
222	if (arg->count < arg->skip)
223		goto skip;
224
225	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
226		arg->stop = 1;
227		return;
228	}
229skip:
230	arg->count++;
231}
232
233static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
234			   struct sk_buff *skb, struct tcmsg *t)
235{
236	struct cls_cgroup_head *head = tp->root;
237	unsigned char *b = skb_tail_pointer(skb);
238	struct nlattr *nest;
239
240	t->tcm_handle = head->handle;
241
242	nest = nla_nest_start(skb, TCA_OPTIONS);
243	if (nest == NULL)
244		goto nla_put_failure;
245
246	if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
247	    tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
248		goto nla_put_failure;
249
250	nla_nest_end(skb, nest);
251
252	if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
253		goto nla_put_failure;
254
255	return skb->len;
256
257nla_put_failure:
258	nlmsg_trim(skb, b);
259	return -1;
260}
261
262static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
263	.kind		=	"cgroup",
264	.init		=	cls_cgroup_init,
265	.change		=	cls_cgroup_change,
266	.classify	=	cls_cgroup_classify,
267	.destroy	=	cls_cgroup_destroy,
268	.get		=	cls_cgroup_get,
269	.put		=	cls_cgroup_put,
270	.delete		=	cls_cgroup_delete,
271	.walk		=	cls_cgroup_walk,
272	.dump		=	cls_cgroup_dump,
273	.owner		=	THIS_MODULE,
274};
275
276static int __init init_cgroup_cls(void)
277{
278	int ret;
279
280	ret = cgroup_load_subsys(&net_cls_subsys);
281	if (ret)
282		goto out;
283
284	ret = register_tcf_proto_ops(&cls_cgroup_ops);
285	if (ret)
286		cgroup_unload_subsys(&net_cls_subsys);
287
288out:
289	return ret;
290}
291
292static void __exit exit_cgroup_cls(void)
293{
294	unregister_tcf_proto_ops(&cls_cgroup_ops);
295
296	cgroup_unload_subsys(&net_cls_subsys);
297}
298
299module_init(init_cgroup_cls);
300module_exit(exit_cgroup_cls);
301MODULE_LICENSE("GPL");
302