1/*
2 * net/sched/police.c	Input police filter.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * 		J Hadi Salim (action changes)
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/string.h>
17#include <linux/errno.h>
18#include <linux/skbuff.h>
19#include <linux/rtnetlink.h>
20#include <linux/init.h>
21#include <linux/slab.h>
22#include <net/act_api.h>
23#include <net/netlink.h>
24
25struct tcf_police {
26	struct tcf_common	common;
27	int			tcfp_result;
28	u32			tcfp_ewma_rate;
29	s64			tcfp_burst;
30	u32			tcfp_mtu;
31	s64			tcfp_toks;
32	s64			tcfp_ptoks;
33	s64			tcfp_mtu_ptoks;
34	s64			tcfp_t_c;
35	struct psched_ratecfg	rate;
36	bool			rate_present;
37	struct psched_ratecfg	peak;
38	bool			peak_present;
39};
40#define to_police(pc)	\
41	container_of(pc, struct tcf_police, common)
42
43#define POL_TAB_MASK     15
44static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
45static u32 police_idx_gen;
46static DEFINE_RWLOCK(police_lock);
47
48static struct tcf_hashinfo police_hash_info = {
49	.htab	=	tcf_police_ht,
50	.hmask	=	POL_TAB_MASK,
51	.lock	=	&police_lock,
52};
53
54/* old policer structure from before tc actions */
55struct tc_police_compat {
56	u32			index;
57	int			action;
58	u32			limit;
59	u32			burst;
60	u32			mtu;
61	struct tc_ratespec	rate;
62	struct tc_ratespec	peakrate;
63};
64
65/* Each policer is serialized by its individual spinlock */
66
67static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
68			      int type, struct tc_action *a)
69{
70	struct tcf_common *p;
71	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
72	struct nlattr *nest;
73
74	read_lock_bh(&police_lock);
75
76	s_i = cb->args[0];
77
78	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
79		p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
80
81		for (; p; p = p->tcfc_next) {
82			index++;
83			if (index < s_i)
84				continue;
85			a->priv = p;
86			a->order = index;
87			nest = nla_nest_start(skb, a->order);
88			if (nest == NULL)
89				goto nla_put_failure;
90			if (type == RTM_DELACTION)
91				err = tcf_action_dump_1(skb, a, 0, 1);
92			else
93				err = tcf_action_dump_1(skb, a, 0, 0);
94			if (err < 0) {
95				index--;
96				nla_nest_cancel(skb, nest);
97				goto done;
98			}
99			nla_nest_end(skb, nest);
100			n_i++;
101		}
102	}
103done:
104	read_unlock_bh(&police_lock);
105	if (n_i)
106		cb->args[0] += n_i;
107	return n_i;
108
109nla_put_failure:
110	nla_nest_cancel(skb, nest);
111	goto done;
112}
113
114static void tcf_police_destroy(struct tcf_police *p)
115{
116	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
117	struct tcf_common **p1p;
118
119	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
120		if (*p1p == &p->common) {
121			write_lock_bh(&police_lock);
122			*p1p = p->tcf_next;
123			write_unlock_bh(&police_lock);
124			gen_kill_estimator(&p->tcf_bstats,
125					   &p->tcf_rate_est);
126			/*
127			 * gen_estimator est_timer() might access p->tcf_lock
128			 * or bstats, wait a RCU grace period before freeing p
129			 */
130			kfree_rcu(p, tcf_rcu);
131			return;
132		}
133	}
134	WARN_ON(1);
135}
136
137static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
138	[TCA_POLICE_RATE]	= { .len = TC_RTAB_SIZE },
139	[TCA_POLICE_PEAKRATE]	= { .len = TC_RTAB_SIZE },
140	[TCA_POLICE_AVRATE]	= { .type = NLA_U32 },
141	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
142};
143
144static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
145				 struct nlattr *est, struct tc_action *a,
146				 int ovr, int bind)
147{
148	unsigned int h;
149	int ret = 0, err;
150	struct nlattr *tb[TCA_POLICE_MAX + 1];
151	struct tc_police *parm;
152	struct tcf_police *police;
153	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
154	int size;
155
156	if (nla == NULL)
157		return -EINVAL;
158
159	err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
160	if (err < 0)
161		return err;
162
163	if (tb[TCA_POLICE_TBF] == NULL)
164		return -EINVAL;
165	size = nla_len(tb[TCA_POLICE_TBF]);
166	if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
167		return -EINVAL;
168	parm = nla_data(tb[TCA_POLICE_TBF]);
169
170	if (parm->index) {
171		struct tcf_common *pc;
172
173		pc = tcf_hash_lookup(parm->index, &police_hash_info);
174		if (pc != NULL) {
175			a->priv = pc;
176			police = to_police(pc);
177			if (bind) {
178				police->tcf_bindcnt += 1;
179				police->tcf_refcnt += 1;
180			}
181			if (ovr)
182				goto override;
183			return ret;
184		}
185	}
186
187	police = kzalloc(sizeof(*police), GFP_KERNEL);
188	if (police == NULL)
189		return -ENOMEM;
190	ret = ACT_P_CREATED;
191	police->tcf_refcnt = 1;
192	spin_lock_init(&police->tcf_lock);
193	if (bind)
194		police->tcf_bindcnt = 1;
195override:
196	if (parm->rate.rate) {
197		err = -ENOMEM;
198		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
199		if (R_tab == NULL)
200			goto failure;
201
202		if (parm->peakrate.rate) {
203			P_tab = qdisc_get_rtab(&parm->peakrate,
204					       tb[TCA_POLICE_PEAKRATE]);
205			if (P_tab == NULL)
206				goto failure;
207		}
208	}
209
210	spin_lock_bh(&police->tcf_lock);
211	if (est) {
212		err = gen_replace_estimator(&police->tcf_bstats,
213					    &police->tcf_rate_est,
214					    &police->tcf_lock, est);
215		if (err)
216			goto failure_unlock;
217	} else if (tb[TCA_POLICE_AVRATE] &&
218		   (ret == ACT_P_CREATED ||
219		    !gen_estimator_active(&police->tcf_bstats,
220					  &police->tcf_rate_est))) {
221		err = -EINVAL;
222		goto failure_unlock;
223	}
224
225	/* No failure allowed after this point */
226	police->tcfp_mtu = parm->mtu;
227	if (police->tcfp_mtu == 0) {
228		police->tcfp_mtu = ~0;
229		if (R_tab)
230			police->tcfp_mtu = 255 << R_tab->rate.cell_log;
231	}
232	if (R_tab) {
233		police->rate_present = true;
234		psched_ratecfg_precompute(&police->rate, &R_tab->rate);
235		qdisc_put_rtab(R_tab);
236	} else {
237		police->rate_present = false;
238	}
239	if (P_tab) {
240		police->peak_present = true;
241		psched_ratecfg_precompute(&police->peak, &P_tab->rate);
242		qdisc_put_rtab(P_tab);
243	} else {
244		police->peak_present = false;
245	}
246
247	if (tb[TCA_POLICE_RESULT])
248		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
249	police->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
250	police->tcfp_toks = police->tcfp_burst;
251	if (police->peak_present) {
252		police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak,
253							     police->tcfp_mtu);
254		police->tcfp_ptoks = police->tcfp_mtu_ptoks;
255	}
256	police->tcf_action = parm->action;
257
258	if (tb[TCA_POLICE_AVRATE])
259		police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
260
261	spin_unlock_bh(&police->tcf_lock);
262	if (ret != ACT_P_CREATED)
263		return ret;
264
265	police->tcfp_t_c = ktime_to_ns(ktime_get());
266	police->tcf_index = parm->index ? parm->index :
267		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
268	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
269	write_lock_bh(&police_lock);
270	police->tcf_next = tcf_police_ht[h];
271	tcf_police_ht[h] = &police->common;
272	write_unlock_bh(&police_lock);
273
274	a->priv = police;
275	return ret;
276
277failure_unlock:
278	spin_unlock_bh(&police->tcf_lock);
279failure:
280	if (P_tab)
281		qdisc_put_rtab(P_tab);
282	if (R_tab)
283		qdisc_put_rtab(R_tab);
284	if (ret == ACT_P_CREATED)
285		kfree(police);
286	return err;
287}
288
289static int tcf_act_police_cleanup(struct tc_action *a, int bind)
290{
291	struct tcf_police *p = a->priv;
292	int ret = 0;
293
294	if (p != NULL) {
295		if (bind)
296			p->tcf_bindcnt--;
297
298		p->tcf_refcnt--;
299		if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) {
300			tcf_police_destroy(p);
301			ret = 1;
302		}
303	}
304	return ret;
305}
306
307static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
308			  struct tcf_result *res)
309{
310	struct tcf_police *police = a->priv;
311	s64 now;
312	s64 toks;
313	s64 ptoks = 0;
314
315	spin_lock(&police->tcf_lock);
316
317	bstats_update(&police->tcf_bstats, skb);
318
319	if (police->tcfp_ewma_rate &&
320	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
321		police->tcf_qstats.overlimits++;
322		if (police->tcf_action == TC_ACT_SHOT)
323			police->tcf_qstats.drops++;
324		spin_unlock(&police->tcf_lock);
325		return police->tcf_action;
326	}
327
328	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
329		if (!police->rate_present) {
330			spin_unlock(&police->tcf_lock);
331			return police->tcfp_result;
332		}
333
334		now = ktime_to_ns(ktime_get());
335		toks = min_t(s64, now - police->tcfp_t_c,
336			     police->tcfp_burst);
337		if (police->peak_present) {
338			ptoks = toks + police->tcfp_ptoks;
339			if (ptoks > police->tcfp_mtu_ptoks)
340				ptoks = police->tcfp_mtu_ptoks;
341			ptoks -= (s64) psched_l2t_ns(&police->peak,
342						     qdisc_pkt_len(skb));
343		}
344		toks += police->tcfp_toks;
345		if (toks > police->tcfp_burst)
346			toks = police->tcfp_burst;
347		toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb));
348		if ((toks|ptoks) >= 0) {
349			police->tcfp_t_c = now;
350			police->tcfp_toks = toks;
351			police->tcfp_ptoks = ptoks;
352			spin_unlock(&police->tcf_lock);
353			return police->tcfp_result;
354		}
355	}
356
357	police->tcf_qstats.overlimits++;
358	if (police->tcf_action == TC_ACT_SHOT)
359		police->tcf_qstats.drops++;
360	spin_unlock(&police->tcf_lock);
361	return police->tcf_action;
362}
363
364static int
365tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
366{
367	unsigned char *b = skb_tail_pointer(skb);
368	struct tcf_police *police = a->priv;
369	struct tc_police opt = {
370		.index = police->tcf_index,
371		.action = police->tcf_action,
372		.mtu = police->tcfp_mtu,
373		.burst = PSCHED_NS2TICKS(police->tcfp_burst),
374		.refcnt = police->tcf_refcnt - ref,
375		.bindcnt = police->tcf_bindcnt - bind,
376	};
377
378	if (police->rate_present)
379		psched_ratecfg_getrate(&opt.rate, &police->rate);
380	if (police->peak_present)
381		psched_ratecfg_getrate(&opt.peakrate, &police->peak);
382	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
383		goto nla_put_failure;
384	if (police->tcfp_result &&
385	    nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result))
386		goto nla_put_failure;
387	if (police->tcfp_ewma_rate &&
388	    nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate))
389		goto nla_put_failure;
390	return skb->len;
391
392nla_put_failure:
393	nlmsg_trim(skb, b);
394	return -1;
395}
396
397MODULE_AUTHOR("Alexey Kuznetsov");
398MODULE_DESCRIPTION("Policing actions");
399MODULE_LICENSE("GPL");
400
401static struct tc_action_ops act_police_ops = {
402	.kind		=	"police",
403	.hinfo		=	&police_hash_info,
404	.type		=	TCA_ID_POLICE,
405	.capab		=	TCA_CAP_NONE,
406	.owner		=	THIS_MODULE,
407	.act		=	tcf_act_police,
408	.dump		=	tcf_act_police_dump,
409	.cleanup	=	tcf_act_police_cleanup,
410	.lookup		=	tcf_hash_search,
411	.init		=	tcf_act_police_locate,
412	.walk		=	tcf_act_police_walker
413};
414
415static int __init
416police_init_module(void)
417{
418	return tcf_register_action(&act_police_ops);
419}
420
421static void __exit
422police_cleanup_module(void)
423{
424	tcf_unregister_action(&act_police_ops);
425}
426
427module_init(police_init_module);
428module_exit(police_cleanup_module);
429