1/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2 *
3 *		This program is free software; you can redistribute it and/or
4 *		modify it under the terms of the GNU General Public License
5 *		as published by the Free Software Foundation; either version
6 *		2 of the License, or (at your option) any later version.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/if_arp.h>
18#include <linux/netdevice.h>
19#include <linux/init.h>
20#include <linux/skbuff.h>
21#include <linux/moduleparam.h>
22#include <net/dst.h>
23#include <net/neighbour.h>
24#include <net/pkt_sched.h>
25
26/*
27   How to setup it.
28   ----------------
29
30   After loading this module you will find a new device teqlN
31   and new qdisc with the same name. To join a slave to the equalizer
32   you should just set this qdisc on a device f.e.
33
34   # tc qdisc add dev eth0 root teql0
35   # tc qdisc add dev eth1 root teql0
36
37   That's all. Full PnP 8)
38
39   Applicability.
40   --------------
41
42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43      signal and generate EOI events. If you want to equalize virtual devices
44      like tunnels, use a normal eql device.
45   2. This device puts no limitations on physical slave characteristics
46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47      Certainly, large difference in link speeds will make the resulting
48      eqalized link unusable, because of huge packet reordering.
49      I estimate an upper useful difference as ~10 times.
50   3. If the slave requires address resolution, only protocols using
51      neighbour cache (IPv4/IPv6) will work over the equalized link.
52      Other protocols are still allowed to use the slave device directly,
53      which will not break load balancing, though native slave
54      traffic will have the highest priority.  */
55
56struct teql_master {
57	struct Qdisc_ops qops;
58	struct net_device *dev;
59	struct Qdisc *slaves;
60	struct list_head master_list;
61	unsigned long	tx_bytes;
62	unsigned long	tx_packets;
63	unsigned long	tx_errors;
64	unsigned long	tx_dropped;
65};
66
67struct teql_sched_data {
68	struct Qdisc *next;
69	struct teql_master *m;
70	struct sk_buff_head q;
71};
72
73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77/* "teql*" qdisc routines */
78
79static int
80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
81{
82	struct net_device *dev = qdisc_dev(sch);
83	struct teql_sched_data *q = qdisc_priv(sch);
84
85	if (q->q.qlen < dev->tx_queue_len) {
86		__skb_queue_tail(&q->q, skb);
87		return NET_XMIT_SUCCESS;
88	}
89
90	return qdisc_drop(skb, sch);
91}
92
93static struct sk_buff *
94teql_dequeue(struct Qdisc *sch)
95{
96	struct teql_sched_data *dat = qdisc_priv(sch);
97	struct netdev_queue *dat_queue;
98	struct sk_buff *skb;
99	struct Qdisc *q;
100
101	skb = __skb_dequeue(&dat->q);
102	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103	q = rcu_dereference_bh(dat_queue->qdisc);
104
105	if (skb == NULL) {
106		struct net_device *m = qdisc_dev(q);
107		if (m) {
108			dat->m->slaves = sch;
109			netif_wake_queue(m);
110		}
111	} else {
112		qdisc_bstats_update(sch, skb);
113	}
114	sch->q.qlen = dat->q.qlen + q->q.qlen;
115	return skb;
116}
117
118static struct sk_buff *
119teql_peek(struct Qdisc *sch)
120{
121	/* teql is meant to be used as root qdisc */
122	return NULL;
123}
124
125static inline void
126teql_neigh_release(struct neighbour *n)
127{
128	if (n)
129		neigh_release(n);
130}
131
132static void
133teql_reset(struct Qdisc *sch)
134{
135	struct teql_sched_data *dat = qdisc_priv(sch);
136
137	skb_queue_purge(&dat->q);
138	sch->q.qlen = 0;
139}
140
141static void
142teql_destroy(struct Qdisc *sch)
143{
144	struct Qdisc *q, *prev;
145	struct teql_sched_data *dat = qdisc_priv(sch);
146	struct teql_master *master = dat->m;
147
148	prev = master->slaves;
149	if (prev) {
150		do {
151			q = NEXT_SLAVE(prev);
152			if (q == sch) {
153				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
154				if (q == master->slaves) {
155					master->slaves = NEXT_SLAVE(q);
156					if (q == master->slaves) {
157						struct netdev_queue *txq;
158						spinlock_t *root_lock;
159
160						txq = netdev_get_tx_queue(master->dev, 0);
161						master->slaves = NULL;
162
163						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
164						spin_lock_bh(root_lock);
165						qdisc_reset(rtnl_dereference(txq->qdisc));
166						spin_unlock_bh(root_lock);
167					}
168				}
169				skb_queue_purge(&dat->q);
170				break;
171			}
172
173		} while ((prev = q) != master->slaves);
174	}
175}
176
177static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
178{
179	struct net_device *dev = qdisc_dev(sch);
180	struct teql_master *m = (struct teql_master *)sch->ops;
181	struct teql_sched_data *q = qdisc_priv(sch);
182
183	if (dev->hard_header_len > m->dev->hard_header_len)
184		return -EINVAL;
185
186	if (m->dev == dev)
187		return -ELOOP;
188
189	q->m = m;
190
191	skb_queue_head_init(&q->q);
192
193	if (m->slaves) {
194		if (m->dev->flags & IFF_UP) {
195			if ((m->dev->flags & IFF_POINTOPOINT &&
196			     !(dev->flags & IFF_POINTOPOINT)) ||
197			    (m->dev->flags & IFF_BROADCAST &&
198			     !(dev->flags & IFF_BROADCAST)) ||
199			    (m->dev->flags & IFF_MULTICAST &&
200			     !(dev->flags & IFF_MULTICAST)) ||
201			    dev->mtu < m->dev->mtu)
202				return -EINVAL;
203		} else {
204			if (!(dev->flags&IFF_POINTOPOINT))
205				m->dev->flags &= ~IFF_POINTOPOINT;
206			if (!(dev->flags&IFF_BROADCAST))
207				m->dev->flags &= ~IFF_BROADCAST;
208			if (!(dev->flags&IFF_MULTICAST))
209				m->dev->flags &= ~IFF_MULTICAST;
210			if (dev->mtu < m->dev->mtu)
211				m->dev->mtu = dev->mtu;
212		}
213		q->next = NEXT_SLAVE(m->slaves);
214		NEXT_SLAVE(m->slaves) = sch;
215	} else {
216		q->next = sch;
217		m->slaves = sch;
218		m->dev->mtu = dev->mtu;
219		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
220	}
221	return 0;
222}
223
224
225static int
226__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
227	       struct net_device *dev, struct netdev_queue *txq,
228	       struct dst_entry *dst)
229{
230	struct neighbour *n;
231	int err = 0;
232
233	n = dst_neigh_lookup_skb(dst, skb);
234	if (!n)
235		return -ENOENT;
236
237	if (dst->dev != dev) {
238		struct neighbour *mn;
239
240		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
241		neigh_release(n);
242		if (IS_ERR(mn))
243			return PTR_ERR(mn);
244		n = mn;
245	}
246
247	if (neigh_event_send(n, skb_res) == 0) {
248		int err;
249		char haddr[MAX_ADDR_LEN];
250
251		neigh_ha_snapshot(haddr, n, dev);
252		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
253				      NULL, skb->len);
254
255		if (err < 0)
256			err = -EINVAL;
257	} else {
258		err = (skb_res == NULL) ? -EAGAIN : 1;
259	}
260	neigh_release(n);
261	return err;
262}
263
264static inline int teql_resolve(struct sk_buff *skb,
265			       struct sk_buff *skb_res,
266			       struct net_device *dev,
267			       struct netdev_queue *txq)
268{
269	struct dst_entry *dst = skb_dst(skb);
270	int res;
271
272	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
273		return -ENODEV;
274
275	if (!dev->header_ops || !dst)
276		return 0;
277
278	rcu_read_lock();
279	res = __teql_resolve(skb, skb_res, dev, txq, dst);
280	rcu_read_unlock();
281
282	return res;
283}
284
285static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
286{
287	struct teql_master *master = netdev_priv(dev);
288	struct Qdisc *start, *q;
289	int busy;
290	int nores;
291	int subq = skb_get_queue_mapping(skb);
292	struct sk_buff *skb_res = NULL;
293
294	start = master->slaves;
295
296restart:
297	nores = 0;
298	busy = 0;
299
300	q = start;
301	if (!q)
302		goto drop;
303
304	do {
305		struct net_device *slave = qdisc_dev(q);
306		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
307
308		if (slave_txq->qdisc_sleeping != q)
309			continue;
310		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
311		    !netif_running(slave)) {
312			busy = 1;
313			continue;
314		}
315
316		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
317		case 0:
318			if (__netif_tx_trylock(slave_txq)) {
319				unsigned int length = qdisc_pkt_len(skb);
320
321				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
322				    netdev_start_xmit(skb, slave, slave_txq, false) ==
323				    NETDEV_TX_OK) {
324					__netif_tx_unlock(slave_txq);
325					master->slaves = NEXT_SLAVE(q);
326					netif_wake_queue(dev);
327					master->tx_packets++;
328					master->tx_bytes += length;
329					return NETDEV_TX_OK;
330				}
331				__netif_tx_unlock(slave_txq);
332			}
333			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
334				busy = 1;
335			break;
336		case 1:
337			master->slaves = NEXT_SLAVE(q);
338			return NETDEV_TX_OK;
339		default:
340			nores = 1;
341			break;
342		}
343		__skb_pull(skb, skb_network_offset(skb));
344	} while ((q = NEXT_SLAVE(q)) != start);
345
346	if (nores && skb_res == NULL) {
347		skb_res = skb;
348		goto restart;
349	}
350
351	if (busy) {
352		netif_stop_queue(dev);
353		return NETDEV_TX_BUSY;
354	}
355	master->tx_errors++;
356
357drop:
358	master->tx_dropped++;
359	dev_kfree_skb(skb);
360	return NETDEV_TX_OK;
361}
362
363static int teql_master_open(struct net_device *dev)
364{
365	struct Qdisc *q;
366	struct teql_master *m = netdev_priv(dev);
367	int mtu = 0xFFFE;
368	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
369
370	if (m->slaves == NULL)
371		return -EUNATCH;
372
373	flags = FMASK;
374
375	q = m->slaves;
376	do {
377		struct net_device *slave = qdisc_dev(q);
378
379		if (slave == NULL)
380			return -EUNATCH;
381
382		if (slave->mtu < mtu)
383			mtu = slave->mtu;
384		if (slave->hard_header_len > LL_MAX_HEADER)
385			return -EINVAL;
386
387		/* If all the slaves are BROADCAST, master is BROADCAST
388		   If all the slaves are PtP, master is PtP
389		   Otherwise, master is NBMA.
390		 */
391		if (!(slave->flags&IFF_POINTOPOINT))
392			flags &= ~IFF_POINTOPOINT;
393		if (!(slave->flags&IFF_BROADCAST))
394			flags &= ~IFF_BROADCAST;
395		if (!(slave->flags&IFF_MULTICAST))
396			flags &= ~IFF_MULTICAST;
397	} while ((q = NEXT_SLAVE(q)) != m->slaves);
398
399	m->dev->mtu = mtu;
400	m->dev->flags = (m->dev->flags&~FMASK) | flags;
401	netif_start_queue(m->dev);
402	return 0;
403}
404
405static int teql_master_close(struct net_device *dev)
406{
407	netif_stop_queue(dev);
408	return 0;
409}
410
411static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
412						     struct rtnl_link_stats64 *stats)
413{
414	struct teql_master *m = netdev_priv(dev);
415
416	stats->tx_packets	= m->tx_packets;
417	stats->tx_bytes		= m->tx_bytes;
418	stats->tx_errors	= m->tx_errors;
419	stats->tx_dropped	= m->tx_dropped;
420	return stats;
421}
422
423static int teql_master_mtu(struct net_device *dev, int new_mtu)
424{
425	struct teql_master *m = netdev_priv(dev);
426	struct Qdisc *q;
427
428	if (new_mtu < 68)
429		return -EINVAL;
430
431	q = m->slaves;
432	if (q) {
433		do {
434			if (new_mtu > qdisc_dev(q)->mtu)
435				return -EINVAL;
436		} while ((q = NEXT_SLAVE(q)) != m->slaves);
437	}
438
439	dev->mtu = new_mtu;
440	return 0;
441}
442
443static const struct net_device_ops teql_netdev_ops = {
444	.ndo_open	= teql_master_open,
445	.ndo_stop	= teql_master_close,
446	.ndo_start_xmit	= teql_master_xmit,
447	.ndo_get_stats64 = teql_master_stats64,
448	.ndo_change_mtu	= teql_master_mtu,
449};
450
451static __init void teql_master_setup(struct net_device *dev)
452{
453	struct teql_master *master = netdev_priv(dev);
454	struct Qdisc_ops *ops = &master->qops;
455
456	master->dev	= dev;
457	ops->priv_size  = sizeof(struct teql_sched_data);
458
459	ops->enqueue	=	teql_enqueue;
460	ops->dequeue	=	teql_dequeue;
461	ops->peek	=	teql_peek;
462	ops->init	=	teql_qdisc_init;
463	ops->reset	=	teql_reset;
464	ops->destroy	=	teql_destroy;
465	ops->owner	=	THIS_MODULE;
466
467	dev->netdev_ops =       &teql_netdev_ops;
468	dev->type		= ARPHRD_VOID;
469	dev->mtu		= 1500;
470	dev->tx_queue_len	= 100;
471	dev->flags		= IFF_NOARP;
472	dev->hard_header_len	= LL_MAX_HEADER;
473	netif_keep_dst(dev);
474}
475
476static LIST_HEAD(master_dev_list);
477static int max_equalizers = 1;
478module_param(max_equalizers, int, 0);
479MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
480
481static int __init teql_init(void)
482{
483	int i;
484	int err = -ENODEV;
485
486	for (i = 0; i < max_equalizers; i++) {
487		struct net_device *dev;
488		struct teql_master *master;
489
490		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
491				   NET_NAME_UNKNOWN, teql_master_setup);
492		if (!dev) {
493			err = -ENOMEM;
494			break;
495		}
496
497		if ((err = register_netdev(dev))) {
498			free_netdev(dev);
499			break;
500		}
501
502		master = netdev_priv(dev);
503
504		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
505		err = register_qdisc(&master->qops);
506
507		if (err) {
508			unregister_netdev(dev);
509			free_netdev(dev);
510			break;
511		}
512
513		list_add_tail(&master->master_list, &master_dev_list);
514	}
515	return i ? 0 : err;
516}
517
518static void __exit teql_exit(void)
519{
520	struct teql_master *master, *nxt;
521
522	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
523
524		list_del(&master->master_list);
525
526		unregister_qdisc(&master->qops);
527		unregister_netdev(master->dev);
528		free_netdev(master->dev);
529	}
530}
531
532module_init(teql_init);
533module_exit(teql_exit);
534
535MODULE_LICENSE("GPL");
536