1/*
2 * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>:
11 *              Init --  EINVAL when opt undefined
12 */
13
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/errno.h>
20#include <linux/skbuff.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23
24
25struct prio_sched_data {
26	int bands;
27	struct tcf_proto *filter_list;
28	u8  prio2band[TC_PRIO_MAX+1];
29	struct Qdisc *queues[TCQ_PRIO_BANDS];
30};
31
32
33static struct Qdisc *
34prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
35{
36	struct prio_sched_data *q = qdisc_priv(sch);
37	u32 band = skb->priority;
38	struct tcf_result res;
39	int err;
40
41	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
42	if (TC_H_MAJ(skb->priority) != sch->handle) {
43		err = tc_classify(skb, q->filter_list, &res);
44#ifdef CONFIG_NET_CLS_ACT
45		switch (err) {
46		case TC_ACT_STOLEN:
47		case TC_ACT_QUEUED:
48			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
49		case TC_ACT_SHOT:
50			return NULL;
51		}
52#endif
53		if (!q->filter_list || err < 0) {
54			if (TC_H_MAJ(band))
55				band = 0;
56			return q->queues[q->prio2band[band & TC_PRIO_MAX]];
57		}
58		band = res.classid;
59	}
60	band = TC_H_MIN(band) - 1;
61	if (band >= q->bands)
62		return q->queues[q->prio2band[0]];
63
64	return q->queues[band];
65}
66
67static int
68prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
69{
70	struct Qdisc *qdisc;
71	int ret;
72
73	qdisc = prio_classify(skb, sch, &ret);
74#ifdef CONFIG_NET_CLS_ACT
75	if (qdisc == NULL) {
76
77		if (ret & __NET_XMIT_BYPASS)
78			sch->qstats.drops++;
79		kfree_skb(skb);
80		return ret;
81	}
82#endif
83
84	ret = qdisc_enqueue(skb, qdisc);
85	if (ret == NET_XMIT_SUCCESS) {
86		sch->q.qlen++;
87		return NET_XMIT_SUCCESS;
88	}
89	if (net_xmit_drop_count(ret))
90		sch->qstats.drops++;
91	return ret;
92}
93
94static struct sk_buff *prio_peek(struct Qdisc *sch)
95{
96	struct prio_sched_data *q = qdisc_priv(sch);
97	int prio;
98
99	for (prio = 0; prio < q->bands; prio++) {
100		struct Qdisc *qdisc = q->queues[prio];
101		struct sk_buff *skb = qdisc->ops->peek(qdisc);
102		if (skb)
103			return skb;
104	}
105	return NULL;
106}
107
108static struct sk_buff *prio_dequeue(struct Qdisc *sch)
109{
110	struct prio_sched_data *q = qdisc_priv(sch);
111	int prio;
112
113	for (prio = 0; prio < q->bands; prio++) {
114		struct Qdisc *qdisc = q->queues[prio];
115		struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
116		if (skb) {
117			qdisc_bstats_update(sch, skb);
118			sch->q.qlen--;
119			return skb;
120		}
121	}
122	return NULL;
123
124}
125
126static unsigned int prio_drop(struct Qdisc *sch)
127{
128	struct prio_sched_data *q = qdisc_priv(sch);
129	int prio;
130	unsigned int len;
131	struct Qdisc *qdisc;
132
133	for (prio = q->bands-1; prio >= 0; prio--) {
134		qdisc = q->queues[prio];
135		if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
136			sch->q.qlen--;
137			return len;
138		}
139	}
140	return 0;
141}
142
143
144static void
145prio_reset(struct Qdisc *sch)
146{
147	int prio;
148	struct prio_sched_data *q = qdisc_priv(sch);
149
150	for (prio = 0; prio < q->bands; prio++)
151		qdisc_reset(q->queues[prio]);
152	sch->q.qlen = 0;
153}
154
155static void
156prio_destroy(struct Qdisc *sch)
157{
158	int prio;
159	struct prio_sched_data *q = qdisc_priv(sch);
160
161	tcf_destroy_chain(&q->filter_list);
162	for (prio = 0; prio < q->bands; prio++)
163		qdisc_destroy(q->queues[prio]);
164}
165
166static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
167{
168	struct prio_sched_data *q = qdisc_priv(sch);
169	struct tc_prio_qopt *qopt;
170	int i;
171
172	if (nla_len(opt) < sizeof(*qopt))
173		return -EINVAL;
174	qopt = nla_data(opt);
175
176	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
177		return -EINVAL;
178
179	for (i = 0; i <= TC_PRIO_MAX; i++) {
180		if (qopt->priomap[i] >= qopt->bands)
181			return -EINVAL;
182	}
183
184	sch_tree_lock(sch);
185	q->bands = qopt->bands;
186	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
187
188	for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
189		struct Qdisc *child = q->queues[i];
190		q->queues[i] = &noop_qdisc;
191		if (child != &noop_qdisc) {
192			qdisc_tree_decrease_qlen(child, child->q.qlen);
193			qdisc_destroy(child);
194		}
195	}
196	sch_tree_unlock(sch);
197
198	for (i = 0; i < q->bands; i++) {
199		if (q->queues[i] == &noop_qdisc) {
200			struct Qdisc *child, *old;
201
202			child = qdisc_create_dflt(sch->dev_queue,
203						  &pfifo_qdisc_ops,
204						  TC_H_MAKE(sch->handle, i + 1));
205			if (child) {
206				sch_tree_lock(sch);
207				old = q->queues[i];
208				q->queues[i] = child;
209
210				if (old != &noop_qdisc) {
211					qdisc_tree_decrease_qlen(old,
212								 old->q.qlen);
213					qdisc_destroy(old);
214				}
215				sch_tree_unlock(sch);
216			}
217		}
218	}
219	return 0;
220}
221
222static int prio_init(struct Qdisc *sch, struct nlattr *opt)
223{
224	struct prio_sched_data *q = qdisc_priv(sch);
225	int i;
226
227	for (i = 0; i < TCQ_PRIO_BANDS; i++)
228		q->queues[i] = &noop_qdisc;
229
230	if (opt == NULL) {
231		return -EINVAL;
232	} else {
233		int err;
234
235		if ((err = prio_tune(sch, opt)) != 0)
236			return err;
237	}
238	return 0;
239}
240
241static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
242{
243	struct prio_sched_data *q = qdisc_priv(sch);
244	unsigned char *b = skb_tail_pointer(skb);
245	struct tc_prio_qopt opt;
246
247	opt.bands = q->bands;
248	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
249
250	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
251		goto nla_put_failure;
252
253	return skb->len;
254
255nla_put_failure:
256	nlmsg_trim(skb, b);
257	return -1;
258}
259
260static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
261		      struct Qdisc **old)
262{
263	struct prio_sched_data *q = qdisc_priv(sch);
264	unsigned long band = arg - 1;
265
266	if (new == NULL)
267		new = &noop_qdisc;
268
269	sch_tree_lock(sch);
270	*old = q->queues[band];
271	q->queues[band] = new;
272	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
273	qdisc_reset(*old);
274	sch_tree_unlock(sch);
275
276	return 0;
277}
278
279static struct Qdisc *
280prio_leaf(struct Qdisc *sch, unsigned long arg)
281{
282	struct prio_sched_data *q = qdisc_priv(sch);
283	unsigned long band = arg - 1;
284
285	return q->queues[band];
286}
287
288static unsigned long prio_get(struct Qdisc *sch, u32 classid)
289{
290	struct prio_sched_data *q = qdisc_priv(sch);
291	unsigned long band = TC_H_MIN(classid);
292
293	if (band - 1 >= q->bands)
294		return 0;
295	return band;
296}
297
298static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
299{
300	return prio_get(sch, classid);
301}
302
303
304static void prio_put(struct Qdisc *q, unsigned long cl)
305{
306}
307
308static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
309			   struct tcmsg *tcm)
310{
311	struct prio_sched_data *q = qdisc_priv(sch);
312
313	tcm->tcm_handle |= TC_H_MIN(cl);
314	tcm->tcm_info = q->queues[cl-1]->handle;
315	return 0;
316}
317
318static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
319				 struct gnet_dump *d)
320{
321	struct prio_sched_data *q = qdisc_priv(sch);
322	struct Qdisc *cl_q;
323
324	cl_q = q->queues[cl - 1];
325	cl_q->qstats.qlen = cl_q->q.qlen;
326	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
327	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
328		return -1;
329
330	return 0;
331}
332
333static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
334{
335	struct prio_sched_data *q = qdisc_priv(sch);
336	int prio;
337
338	if (arg->stop)
339		return;
340
341	for (prio = 0; prio < q->bands; prio++) {
342		if (arg->count < arg->skip) {
343			arg->count++;
344			continue;
345		}
346		if (arg->fn(sch, prio + 1, arg) < 0) {
347			arg->stop = 1;
348			break;
349		}
350		arg->count++;
351	}
352}
353
354static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
355{
356	struct prio_sched_data *q = qdisc_priv(sch);
357
358	if (cl)
359		return NULL;
360	return &q->filter_list;
361}
362
363static const struct Qdisc_class_ops prio_class_ops = {
364	.graft		=	prio_graft,
365	.leaf		=	prio_leaf,
366	.get		=	prio_get,
367	.put		=	prio_put,
368	.walk		=	prio_walk,
369	.tcf_chain	=	prio_find_tcf,
370	.bind_tcf	=	prio_bind,
371	.unbind_tcf	=	prio_put,
372	.dump		=	prio_dump_class,
373	.dump_stats	=	prio_dump_class_stats,
374};
375
376static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
377	.next		=	NULL,
378	.cl_ops		=	&prio_class_ops,
379	.id		=	"prio",
380	.priv_size	=	sizeof(struct prio_sched_data),
381	.enqueue	=	prio_enqueue,
382	.dequeue	=	prio_dequeue,
383	.peek		=	prio_peek,
384	.drop		=	prio_drop,
385	.init		=	prio_init,
386	.reset		=	prio_reset,
387	.destroy	=	prio_destroy,
388	.change		=	prio_tune,
389	.dump		=	prio_dump,
390	.owner		=	THIS_MODULE,
391};
392
393static int __init prio_module_init(void)
394{
395	return register_qdisc(&prio_qdisc_ops);
396}
397
398static void __exit prio_module_exit(void)
399{
400	unregister_qdisc(&prio_qdisc_ops);
401}
402
403module_init(prio_module_init)
404module_exit(prio_module_exit)
405
406MODULE_LICENSE("GPL");
407