1/*
2 * net/sched/sch_drr.c         Deficit Round Robin scheduler
3 *
4 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/slab.h>
13#include <linux/init.h>
14#include <linux/errno.h>
15#include <linux/netdevice.h>
16#include <linux/pkt_sched.h>
17#include <net/sch_generic.h>
18#include <net/pkt_sched.h>
19#include <net/pkt_cls.h>
20
21struct drr_class {
22	struct Qdisc_class_common	common;
23	unsigned int			refcnt;
24	unsigned int			filter_cnt;
25
26	struct gnet_stats_basic_packed		bstats;
27	struct gnet_stats_queue		qstats;
28	struct gnet_stats_rate_est	rate_est;
29	struct list_head		alist;
30	struct Qdisc			*qdisc;
31
32	u32				quantum;
33	u32				deficit;
34};
35
36struct drr_sched {
37	struct list_head		active;
38	struct tcf_proto		*filter_list;
39	struct Qdisc_class_hash		clhash;
40};
41
42static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
43{
44	struct drr_sched *q = qdisc_priv(sch);
45	struct Qdisc_class_common *clc;
46
47	clc = qdisc_class_find(&q->clhash, classid);
48	if (clc == NULL)
49		return NULL;
50	return container_of(clc, struct drr_class, common);
51}
52
53static void drr_purge_queue(struct drr_class *cl)
54{
55	unsigned int len = cl->qdisc->q.qlen;
56
57	qdisc_reset(cl->qdisc);
58	qdisc_tree_decrease_qlen(cl->qdisc, len);
59}
60
61static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
62	[TCA_DRR_QUANTUM]	= { .type = NLA_U32 },
63};
64
65static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
66			    struct nlattr **tca, unsigned long *arg)
67{
68	struct drr_sched *q = qdisc_priv(sch);
69	struct drr_class *cl = (struct drr_class *)*arg;
70	struct nlattr *opt = tca[TCA_OPTIONS];
71	struct nlattr *tb[TCA_DRR_MAX + 1];
72	u32 quantum;
73	int err;
74
75	if (!opt)
76		return -EINVAL;
77
78	err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy);
79	if (err < 0)
80		return err;
81
82	if (tb[TCA_DRR_QUANTUM]) {
83		quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
84		if (quantum == 0)
85			return -EINVAL;
86	} else
87		quantum = psched_mtu(qdisc_dev(sch));
88
89	if (cl != NULL) {
90		if (tca[TCA_RATE]) {
91			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
92						    qdisc_root_sleeping_lock(sch),
93						    tca[TCA_RATE]);
94			if (err)
95				return err;
96		}
97
98		sch_tree_lock(sch);
99		if (tb[TCA_DRR_QUANTUM])
100			cl->quantum = quantum;
101		sch_tree_unlock(sch);
102
103		return 0;
104	}
105
106	cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL);
107	if (cl == NULL)
108		return -ENOBUFS;
109
110	cl->refcnt	   = 1;
111	cl->common.classid = classid;
112	cl->quantum	   = quantum;
113	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
114					       &pfifo_qdisc_ops, classid);
115	if (cl->qdisc == NULL)
116		cl->qdisc = &noop_qdisc;
117
118	if (tca[TCA_RATE]) {
119		err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
120					    qdisc_root_sleeping_lock(sch),
121					    tca[TCA_RATE]);
122		if (err) {
123			qdisc_destroy(cl->qdisc);
124			kfree(cl);
125			return err;
126		}
127	}
128
129	sch_tree_lock(sch);
130	qdisc_class_hash_insert(&q->clhash, &cl->common);
131	sch_tree_unlock(sch);
132
133	qdisc_class_hash_grow(sch, &q->clhash);
134
135	*arg = (unsigned long)cl;
136	return 0;
137}
138
139static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
140{
141	gen_kill_estimator(&cl->bstats, &cl->rate_est);
142	qdisc_destroy(cl->qdisc);
143	kfree(cl);
144}
145
146static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
147{
148	struct drr_sched *q = qdisc_priv(sch);
149	struct drr_class *cl = (struct drr_class *)arg;
150
151	if (cl->filter_cnt > 0)
152		return -EBUSY;
153
154	sch_tree_lock(sch);
155
156	drr_purge_queue(cl);
157	qdisc_class_hash_remove(&q->clhash, &cl->common);
158
159	BUG_ON(--cl->refcnt == 0);
160	/*
161	 * This shouldn't happen: we "hold" one cops->get() when called
162	 * from tc_ctl_tclass; the destroy method is done from cops->put().
163	 */
164
165	sch_tree_unlock(sch);
166	return 0;
167}
168
169static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
170{
171	struct drr_class *cl = drr_find_class(sch, classid);
172
173	if (cl != NULL)
174		cl->refcnt++;
175
176	return (unsigned long)cl;
177}
178
179static void drr_put_class(struct Qdisc *sch, unsigned long arg)
180{
181	struct drr_class *cl = (struct drr_class *)arg;
182
183	if (--cl->refcnt == 0)
184		drr_destroy_class(sch, cl);
185}
186
187static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
188{
189	struct drr_sched *q = qdisc_priv(sch);
190
191	if (cl)
192		return NULL;
193
194	return &q->filter_list;
195}
196
197static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
198				  u32 classid)
199{
200	struct drr_class *cl = drr_find_class(sch, classid);
201
202	if (cl != NULL)
203		cl->filter_cnt++;
204
205	return (unsigned long)cl;
206}
207
208static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
209{
210	struct drr_class *cl = (struct drr_class *)arg;
211
212	cl->filter_cnt--;
213}
214
215static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
216			   struct Qdisc *new, struct Qdisc **old)
217{
218	struct drr_class *cl = (struct drr_class *)arg;
219
220	if (new == NULL) {
221		new = qdisc_create_dflt(sch->dev_queue,
222					&pfifo_qdisc_ops, cl->common.classid);
223		if (new == NULL)
224			new = &noop_qdisc;
225	}
226
227	sch_tree_lock(sch);
228	drr_purge_queue(cl);
229	*old = cl->qdisc;
230	cl->qdisc = new;
231	sch_tree_unlock(sch);
232	return 0;
233}
234
235static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg)
236{
237	struct drr_class *cl = (struct drr_class *)arg;
238
239	return cl->qdisc;
240}
241
242static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
243{
244	struct drr_class *cl = (struct drr_class *)arg;
245
246	if (cl->qdisc->q.qlen == 0)
247		list_del(&cl->alist);
248}
249
250static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
251			  struct sk_buff *skb, struct tcmsg *tcm)
252{
253	struct drr_class *cl = (struct drr_class *)arg;
254	struct nlattr *nest;
255
256	tcm->tcm_parent	= TC_H_ROOT;
257	tcm->tcm_handle	= cl->common.classid;
258	tcm->tcm_info	= cl->qdisc->handle;
259
260	nest = nla_nest_start(skb, TCA_OPTIONS);
261	if (nest == NULL)
262		goto nla_put_failure;
263	if (nla_put_u32(skb, TCA_DRR_QUANTUM, cl->quantum))
264		goto nla_put_failure;
265	return nla_nest_end(skb, nest);
266
267nla_put_failure:
268	nla_nest_cancel(skb, nest);
269	return -EMSGSIZE;
270}
271
272static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
273				struct gnet_dump *d)
274{
275	struct drr_class *cl = (struct drr_class *)arg;
276	struct tc_drr_stats xstats;
277
278	memset(&xstats, 0, sizeof(xstats));
279	if (cl->qdisc->q.qlen) {
280		xstats.deficit = cl->deficit;
281		cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
282	}
283
284	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
285	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
286	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
287		return -1;
288
289	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
290}
291
292static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
293{
294	struct drr_sched *q = qdisc_priv(sch);
295	struct drr_class *cl;
296	unsigned int i;
297
298	if (arg->stop)
299		return;
300
301	for (i = 0; i < q->clhash.hashsize; i++) {
302		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
303			if (arg->count < arg->skip) {
304				arg->count++;
305				continue;
306			}
307			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
308				arg->stop = 1;
309				return;
310			}
311			arg->count++;
312		}
313	}
314}
315
316static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
317				      int *qerr)
318{
319	struct drr_sched *q = qdisc_priv(sch);
320	struct drr_class *cl;
321	struct tcf_result res;
322	int result;
323
324	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
325		cl = drr_find_class(sch, skb->priority);
326		if (cl != NULL)
327			return cl;
328	}
329
330	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
331	result = tc_classify(skb, q->filter_list, &res);
332	if (result >= 0) {
333#ifdef CONFIG_NET_CLS_ACT
334		switch (result) {
335		case TC_ACT_QUEUED:
336		case TC_ACT_STOLEN:
337			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
338		case TC_ACT_SHOT:
339			return NULL;
340		}
341#endif
342		cl = (struct drr_class *)res.class;
343		if (cl == NULL)
344			cl = drr_find_class(sch, res.classid);
345		return cl;
346	}
347	return NULL;
348}
349
350static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
351{
352	struct drr_sched *q = qdisc_priv(sch);
353	struct drr_class *cl;
354	int err = 0;
355
356	cl = drr_classify(skb, sch, &err);
357	if (cl == NULL) {
358		if (err & __NET_XMIT_BYPASS)
359			sch->qstats.drops++;
360		kfree_skb(skb);
361		return err;
362	}
363
364	err = qdisc_enqueue(skb, cl->qdisc);
365	if (unlikely(err != NET_XMIT_SUCCESS)) {
366		if (net_xmit_drop_count(err)) {
367			cl->qstats.drops++;
368			sch->qstats.drops++;
369		}
370		return err;
371	}
372
373	if (cl->qdisc->q.qlen == 1) {
374		list_add_tail(&cl->alist, &q->active);
375		cl->deficit = cl->quantum;
376	}
377
378	sch->q.qlen++;
379	return err;
380}
381
382static struct sk_buff *drr_dequeue(struct Qdisc *sch)
383{
384	struct drr_sched *q = qdisc_priv(sch);
385	struct drr_class *cl;
386	struct sk_buff *skb;
387	unsigned int len;
388
389	if (list_empty(&q->active))
390		goto out;
391	while (1) {
392		cl = list_first_entry(&q->active, struct drr_class, alist);
393		skb = cl->qdisc->ops->peek(cl->qdisc);
394		if (skb == NULL)
395			goto out;
396
397		len = qdisc_pkt_len(skb);
398		if (len <= cl->deficit) {
399			cl->deficit -= len;
400			skb = qdisc_dequeue_peeked(cl->qdisc);
401			if (cl->qdisc->q.qlen == 0)
402				list_del(&cl->alist);
403
404			bstats_update(&cl->bstats, skb);
405			qdisc_bstats_update(sch, skb);
406			sch->q.qlen--;
407			return skb;
408		}
409
410		cl->deficit += cl->quantum;
411		list_move_tail(&cl->alist, &q->active);
412	}
413out:
414	return NULL;
415}
416
417static unsigned int drr_drop(struct Qdisc *sch)
418{
419	struct drr_sched *q = qdisc_priv(sch);
420	struct drr_class *cl;
421	unsigned int len;
422
423	list_for_each_entry(cl, &q->active, alist) {
424		if (cl->qdisc->ops->drop) {
425			len = cl->qdisc->ops->drop(cl->qdisc);
426			if (len > 0) {
427				sch->q.qlen--;
428				if (cl->qdisc->q.qlen == 0)
429					list_del(&cl->alist);
430				return len;
431			}
432		}
433	}
434	return 0;
435}
436
437static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
438{
439	struct drr_sched *q = qdisc_priv(sch);
440	int err;
441
442	err = qdisc_class_hash_init(&q->clhash);
443	if (err < 0)
444		return err;
445	INIT_LIST_HEAD(&q->active);
446	return 0;
447}
448
449static void drr_reset_qdisc(struct Qdisc *sch)
450{
451	struct drr_sched *q = qdisc_priv(sch);
452	struct drr_class *cl;
453	unsigned int i;
454
455	for (i = 0; i < q->clhash.hashsize; i++) {
456		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
457			if (cl->qdisc->q.qlen)
458				list_del(&cl->alist);
459			qdisc_reset(cl->qdisc);
460		}
461	}
462	sch->q.qlen = 0;
463}
464
465static void drr_destroy_qdisc(struct Qdisc *sch)
466{
467	struct drr_sched *q = qdisc_priv(sch);
468	struct drr_class *cl;
469	struct hlist_node *next;
470	unsigned int i;
471
472	tcf_destroy_chain(&q->filter_list);
473
474	for (i = 0; i < q->clhash.hashsize; i++) {
475		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
476					  common.hnode)
477			drr_destroy_class(sch, cl);
478	}
479	qdisc_class_hash_destroy(&q->clhash);
480}
481
482static const struct Qdisc_class_ops drr_class_ops = {
483	.change		= drr_change_class,
484	.delete		= drr_delete_class,
485	.get		= drr_get_class,
486	.put		= drr_put_class,
487	.tcf_chain	= drr_tcf_chain,
488	.bind_tcf	= drr_bind_tcf,
489	.unbind_tcf	= drr_unbind_tcf,
490	.graft		= drr_graft_class,
491	.leaf		= drr_class_leaf,
492	.qlen_notify	= drr_qlen_notify,
493	.dump		= drr_dump_class,
494	.dump_stats	= drr_dump_class_stats,
495	.walk		= drr_walk,
496};
497
498static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
499	.cl_ops		= &drr_class_ops,
500	.id		= "drr",
501	.priv_size	= sizeof(struct drr_sched),
502	.enqueue	= drr_enqueue,
503	.dequeue	= drr_dequeue,
504	.peek		= qdisc_peek_dequeued,
505	.drop		= drr_drop,
506	.init		= drr_init_qdisc,
507	.reset		= drr_reset_qdisc,
508	.destroy	= drr_destroy_qdisc,
509	.owner		= THIS_MODULE,
510};
511
512static int __init drr_init(void)
513{
514	return register_qdisc(&drr_qdisc_ops);
515}
516
517static void __exit drr_exit(void)
518{
519	unregister_qdisc(&drr_qdisc_ops);
520}
521
522module_init(drr_init);
523module_exit(drr_exit);
524MODULE_LICENSE("GPL");
525