veth.c revision 27a242e92f77c955433ce0347533f401ecdcd0f3
1/*
2 *  drivers/net/veth.c
3 *
4 *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5 *
6 * Author: Pavel Emelianov <xemul@openvz.org>
7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8 *
9 */
10
11#include <linux/netdevice.h>
12#include <linux/ethtool.h>
13#include <linux/etherdevice.h>
14
15#include <net/dst.h>
16#include <net/xfrm.h>
17#include <linux/veth.h>
18
19#define DRV_NAME	"veth"
20#define DRV_VERSION	"1.0"
21
22#define MIN_MTU 68		/* Min L3 MTU */
23#define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
24#define MTU_PAD (ETH_HLEN + 4)  /* Max difference between L2 and L3 size MTU */
25
26struct veth_net_stats {
27	unsigned long	rx_packets;
28	unsigned long	tx_packets;
29	unsigned long	rx_bytes;
30	unsigned long	tx_bytes;
31	unsigned long	tx_dropped;
32	unsigned long	rx_dropped;
33};
34
35struct veth_priv {
36	struct net_device *peer;
37	struct veth_net_stats *stats;
38	unsigned ip_summed;
39};
40
41/*
42 * ethtool interface
43 */
44
45static struct {
46	const char string[ETH_GSTRING_LEN];
47} ethtool_stats_keys[] = {
48	{ "peer_ifindex" },
49};
50
51static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
52{
53	cmd->supported		= 0;
54	cmd->advertising	= 0;
55	cmd->speed		= SPEED_10000;
56	cmd->duplex		= DUPLEX_FULL;
57	cmd->port		= PORT_TP;
58	cmd->phy_address	= 0;
59	cmd->transceiver	= XCVR_INTERNAL;
60	cmd->autoneg		= AUTONEG_DISABLE;
61	cmd->maxtxpkt		= 0;
62	cmd->maxrxpkt		= 0;
63	return 0;
64}
65
66static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
67{
68	strcpy(info->driver, DRV_NAME);
69	strcpy(info->version, DRV_VERSION);
70	strcpy(info->fw_version, "N/A");
71}
72
73static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
74{
75	switch(stringset) {
76	case ETH_SS_STATS:
77		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
78		break;
79	}
80}
81
82static int veth_get_sset_count(struct net_device *dev, int sset)
83{
84	switch (sset) {
85	case ETH_SS_STATS:
86		return ARRAY_SIZE(ethtool_stats_keys);
87	default:
88		return -EOPNOTSUPP;
89	}
90}
91
92static void veth_get_ethtool_stats(struct net_device *dev,
93		struct ethtool_stats *stats, u64 *data)
94{
95	struct veth_priv *priv;
96
97	priv = netdev_priv(dev);
98	data[0] = priv->peer->ifindex;
99}
100
101static u32 veth_get_rx_csum(struct net_device *dev)
102{
103	struct veth_priv *priv;
104
105	priv = netdev_priv(dev);
106	return priv->ip_summed == CHECKSUM_UNNECESSARY;
107}
108
109static int veth_set_rx_csum(struct net_device *dev, u32 data)
110{
111	struct veth_priv *priv;
112
113	priv = netdev_priv(dev);
114	priv->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
115	return 0;
116}
117
118static u32 veth_get_tx_csum(struct net_device *dev)
119{
120	return (dev->features & NETIF_F_NO_CSUM) != 0;
121}
122
123static int veth_set_tx_csum(struct net_device *dev, u32 data)
124{
125	if (data)
126		dev->features |= NETIF_F_NO_CSUM;
127	else
128		dev->features &= ~NETIF_F_NO_CSUM;
129	return 0;
130}
131
132static struct ethtool_ops veth_ethtool_ops = {
133	.get_settings		= veth_get_settings,
134	.get_drvinfo		= veth_get_drvinfo,
135	.get_link		= ethtool_op_get_link,
136	.get_rx_csum		= veth_get_rx_csum,
137	.set_rx_csum		= veth_set_rx_csum,
138	.get_tx_csum		= veth_get_tx_csum,
139	.set_tx_csum		= veth_set_tx_csum,
140	.get_sg			= ethtool_op_get_sg,
141	.set_sg			= ethtool_op_set_sg,
142	.get_strings		= veth_get_strings,
143	.get_sset_count		= veth_get_sset_count,
144	.get_ethtool_stats	= veth_get_ethtool_stats,
145};
146
147/*
148 * xmit
149 */
150
151static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
152{
153	struct net_device *rcv = NULL;
154	struct veth_priv *priv, *rcv_priv;
155	struct veth_net_stats *stats, *rcv_stats;
156	int length, cpu;
157
158	skb_orphan(skb);
159
160	priv = netdev_priv(dev);
161	rcv = priv->peer;
162	rcv_priv = netdev_priv(rcv);
163
164	cpu = smp_processor_id();
165	stats = per_cpu_ptr(priv->stats, cpu);
166	rcv_stats = per_cpu_ptr(rcv_priv->stats, cpu);
167
168	if (!(rcv->flags & IFF_UP))
169		goto tx_drop;
170
171	if (skb->len > (rcv->mtu + MTU_PAD))
172		goto rx_drop;
173
174        skb->tstamp.tv64 = 0;
175	skb->pkt_type = PACKET_HOST;
176	skb->protocol = eth_type_trans(skb, rcv);
177	if (dev->features & NETIF_F_NO_CSUM)
178		skb->ip_summed = rcv_priv->ip_summed;
179
180	skb->mark = 0;
181	secpath_reset(skb);
182	nf_reset(skb);
183
184	length = skb->len;
185
186	stats->tx_bytes += length;
187	stats->tx_packets++;
188
189	rcv_stats->rx_bytes += length;
190	rcv_stats->rx_packets++;
191
192	netif_rx(skb);
193	return NETDEV_TX_OK;
194
195tx_drop:
196	kfree_skb(skb);
197	stats->tx_dropped++;
198	return NETDEV_TX_OK;
199
200rx_drop:
201	kfree_skb(skb);
202	rcv_stats->rx_dropped++;
203	return NETDEV_TX_OK;
204}
205
206/*
207 * general routines
208 */
209
210static struct net_device_stats *veth_get_stats(struct net_device *dev)
211{
212	struct veth_priv *priv;
213	struct net_device_stats *dev_stats;
214	int cpu;
215	struct veth_net_stats *stats;
216
217	priv = netdev_priv(dev);
218	dev_stats = &dev->stats;
219
220	dev_stats->rx_packets = 0;
221	dev_stats->tx_packets = 0;
222	dev_stats->rx_bytes = 0;
223	dev_stats->tx_bytes = 0;
224	dev_stats->tx_dropped = 0;
225	dev_stats->rx_dropped = 0;
226
227	for_each_online_cpu(cpu) {
228		stats = per_cpu_ptr(priv->stats, cpu);
229
230		dev_stats->rx_packets += stats->rx_packets;
231		dev_stats->tx_packets += stats->tx_packets;
232		dev_stats->rx_bytes += stats->rx_bytes;
233		dev_stats->tx_bytes += stats->tx_bytes;
234		dev_stats->tx_dropped += stats->tx_dropped;
235		dev_stats->rx_dropped += stats->rx_dropped;
236	}
237
238	return dev_stats;
239}
240
241static int veth_open(struct net_device *dev)
242{
243	struct veth_priv *priv;
244
245	priv = netdev_priv(dev);
246	if (priv->peer == NULL)
247		return -ENOTCONN;
248
249	if (priv->peer->flags & IFF_UP) {
250		netif_carrier_on(dev);
251		netif_carrier_on(priv->peer);
252	}
253	return 0;
254}
255
256static int veth_close(struct net_device *dev)
257{
258	struct veth_priv *priv = netdev_priv(dev);
259
260	netif_carrier_off(dev);
261	netif_carrier_off(priv->peer);
262
263	return 0;
264}
265
266static int is_valid_veth_mtu(int new_mtu)
267{
268	return (new_mtu >= MIN_MTU && new_mtu <= MAX_MTU);
269}
270
271static int veth_change_mtu(struct net_device *dev, int new_mtu)
272{
273	if (!is_valid_veth_mtu(new_mtu))
274		return -EINVAL;
275	dev->mtu = new_mtu;
276	return 0;
277}
278
279static int veth_dev_init(struct net_device *dev)
280{
281	struct veth_net_stats *stats;
282	struct veth_priv *priv;
283
284	stats = alloc_percpu(struct veth_net_stats);
285	if (stats == NULL)
286		return -ENOMEM;
287
288	priv = netdev_priv(dev);
289	priv->stats = stats;
290	return 0;
291}
292
293static void veth_dev_free(struct net_device *dev)
294{
295	struct veth_priv *priv;
296
297	priv = netdev_priv(dev);
298	free_percpu(priv->stats);
299	free_netdev(dev);
300}
301
302static const struct net_device_ops veth_netdev_ops = {
303	.ndo_init            = veth_dev_init,
304	.ndo_open            = veth_open,
305	.ndo_stop            = veth_close,
306	.ndo_start_xmit      = veth_xmit,
307	.ndo_change_mtu      = veth_change_mtu,
308	.ndo_get_stats       = veth_get_stats,
309	.ndo_set_mac_address = eth_mac_addr,
310};
311
312static void veth_setup(struct net_device *dev)
313{
314	ether_setup(dev);
315
316	dev->netdev_ops = &veth_netdev_ops;
317	dev->ethtool_ops = &veth_ethtool_ops;
318	dev->features |= NETIF_F_LLTX;
319	dev->destructor = veth_dev_free;
320}
321
322/*
323 * netlink interface
324 */
325
326static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
327{
328	if (tb[IFLA_ADDRESS]) {
329		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
330			return -EINVAL;
331		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
332			return -EADDRNOTAVAIL;
333	}
334	if (tb[IFLA_MTU]) {
335		if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
336			return -EINVAL;
337	}
338	return 0;
339}
340
341static struct rtnl_link_ops veth_link_ops;
342
343static int veth_newlink(struct net_device *dev,
344			 struct nlattr *tb[], struct nlattr *data[])
345{
346	int err;
347	struct net_device *peer;
348	struct veth_priv *priv;
349	char ifname[IFNAMSIZ];
350	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
351
352	/*
353	 * create and register peer first
354	 *
355	 * struct ifinfomsg is at the head of VETH_INFO_PEER, but we
356	 * skip it since no info from it is useful yet
357	 */
358
359	if (data != NULL && data[VETH_INFO_PEER] != NULL) {
360		struct nlattr *nla_peer;
361
362		nla_peer = data[VETH_INFO_PEER];
363		err = nla_parse(peer_tb, IFLA_MAX,
364				nla_data(nla_peer) + sizeof(struct ifinfomsg),
365				nla_len(nla_peer) - sizeof(struct ifinfomsg),
366				ifla_policy);
367		if (err < 0)
368			return err;
369
370		err = veth_validate(peer_tb, NULL);
371		if (err < 0)
372			return err;
373
374		tbp = peer_tb;
375	} else
376		tbp = tb;
377
378	if (tbp[IFLA_IFNAME])
379		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
380	else
381		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
382
383	peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp);
384	if (IS_ERR(peer))
385		return PTR_ERR(peer);
386
387	if (tbp[IFLA_ADDRESS] == NULL)
388		random_ether_addr(peer->dev_addr);
389
390	err = register_netdevice(peer);
391	if (err < 0)
392		goto err_register_peer;
393
394	netif_carrier_off(peer);
395
396	/*
397	 * register dev last
398	 *
399	 * note, that since we've registered new device the dev's name
400	 * should be re-allocated
401	 */
402
403	if (tb[IFLA_ADDRESS] == NULL)
404		random_ether_addr(dev->dev_addr);
405
406	if (tb[IFLA_IFNAME])
407		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
408	else
409		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
410
411	if (strchr(dev->name, '%')) {
412		err = dev_alloc_name(dev, dev->name);
413		if (err < 0)
414			goto err_alloc_name;
415	}
416
417	err = register_netdevice(dev);
418	if (err < 0)
419		goto err_register_dev;
420
421	netif_carrier_off(dev);
422
423	/*
424	 * tie the deviced together
425	 */
426
427	priv = netdev_priv(dev);
428	priv->peer = peer;
429
430	priv = netdev_priv(peer);
431	priv->peer = dev;
432	return 0;
433
434err_register_dev:
435	/* nothing to do */
436err_alloc_name:
437	unregister_netdevice(peer);
438	return err;
439
440err_register_peer:
441	free_netdev(peer);
442	return err;
443}
444
445static void veth_dellink(struct net_device *dev)
446{
447	struct veth_priv *priv;
448	struct net_device *peer;
449
450	priv = netdev_priv(dev);
451	peer = priv->peer;
452
453	unregister_netdevice(dev);
454	unregister_netdevice(peer);
455}
456
457static const struct nla_policy veth_policy[VETH_INFO_MAX + 1];
458
459static struct rtnl_link_ops veth_link_ops = {
460	.kind		= DRV_NAME,
461	.priv_size	= sizeof(struct veth_priv),
462	.setup		= veth_setup,
463	.validate	= veth_validate,
464	.newlink	= veth_newlink,
465	.dellink	= veth_dellink,
466	.policy		= veth_policy,
467	.maxtype	= VETH_INFO_MAX,
468};
469
470/*
471 * init/fini
472 */
473
474static __init int veth_init(void)
475{
476	return rtnl_link_register(&veth_link_ops);
477}
478
479static __exit void veth_exit(void)
480{
481	rtnl_link_unregister(&veth_link_ops);
482}
483
484module_init(veth_init);
485module_exit(veth_exit);
486
487MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
488MODULE_LICENSE("GPL v2");
489MODULE_ALIAS_RTNL_LINK(DRV_NAME);
490