veth.c revision 8ce120f11898c921329a5f618d01dcc1e8e69cac
1/*
2 *  drivers/net/veth.c
3 *
4 *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5 *
6 * Author: Pavel Emelianov <xemul@openvz.org>
7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8 *
9 */
10
11#include <linux/netdevice.h>
12#include <linux/slab.h>
13#include <linux/ethtool.h>
14#include <linux/etherdevice.h>
15#include <linux/u64_stats_sync.h>
16
17#include <net/dst.h>
18#include <net/xfrm.h>
19#include <linux/veth.h>
20#include <linux/module.h>
21
22#define DRV_NAME	"veth"
23#define DRV_VERSION	"1.0"
24
25#define MIN_MTU 68		/* Min L3 MTU */
26#define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
27
28struct veth_net_stats {
29	u64			rx_packets;
30	u64			rx_bytes;
31	u64			tx_packets;
32	u64			tx_bytes;
33	u64			rx_dropped;
34	struct u64_stats_sync	syncp;
35};
36
37struct veth_priv {
38	struct net_device *peer;
39	struct veth_net_stats __percpu *stats;
40};
41
42/*
43 * ethtool interface
44 */
45
46static struct {
47	const char string[ETH_GSTRING_LEN];
48} ethtool_stats_keys[] = {
49	{ "peer_ifindex" },
50};
51
52static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
53{
54	cmd->supported		= 0;
55	cmd->advertising	= 0;
56	ethtool_cmd_speed_set(cmd, SPEED_10000);
57	cmd->duplex		= DUPLEX_FULL;
58	cmd->port		= PORT_TP;
59	cmd->phy_address	= 0;
60	cmd->transceiver	= XCVR_INTERNAL;
61	cmd->autoneg		= AUTONEG_DISABLE;
62	cmd->maxtxpkt		= 0;
63	cmd->maxrxpkt		= 0;
64	return 0;
65}
66
67static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
68{
69	strcpy(info->driver, DRV_NAME);
70	strcpy(info->version, DRV_VERSION);
71	strcpy(info->fw_version, "N/A");
72}
73
74static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
75{
76	switch(stringset) {
77	case ETH_SS_STATS:
78		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
79		break;
80	}
81}
82
83static int veth_get_sset_count(struct net_device *dev, int sset)
84{
85	switch (sset) {
86	case ETH_SS_STATS:
87		return ARRAY_SIZE(ethtool_stats_keys);
88	default:
89		return -EOPNOTSUPP;
90	}
91}
92
93static void veth_get_ethtool_stats(struct net_device *dev,
94		struct ethtool_stats *stats, u64 *data)
95{
96	struct veth_priv *priv;
97
98	priv = netdev_priv(dev);
99	data[0] = priv->peer->ifindex;
100}
101
102static const struct ethtool_ops veth_ethtool_ops = {
103	.get_settings		= veth_get_settings,
104	.get_drvinfo		= veth_get_drvinfo,
105	.get_link		= ethtool_op_get_link,
106	.get_strings		= veth_get_strings,
107	.get_sset_count		= veth_get_sset_count,
108	.get_ethtool_stats	= veth_get_ethtool_stats,
109};
110
111/*
112 * xmit
113 */
114
115static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
116{
117	struct net_device *rcv = NULL;
118	struct veth_priv *priv, *rcv_priv;
119	struct veth_net_stats *stats, *rcv_stats;
120	int length;
121
122	priv = netdev_priv(dev);
123	rcv = priv->peer;
124	rcv_priv = netdev_priv(rcv);
125
126	stats = this_cpu_ptr(priv->stats);
127	rcv_stats = this_cpu_ptr(rcv_priv->stats);
128
129	/* don't change ip_summed == CHECKSUM_PARTIAL, as that
130	   will cause bad checksum on forwarded packets */
131	if (skb->ip_summed == CHECKSUM_NONE &&
132	    rcv->features & NETIF_F_RXCSUM)
133		skb->ip_summed = CHECKSUM_UNNECESSARY;
134
135	length = skb->len;
136	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
137		goto rx_drop;
138
139	u64_stats_update_begin(&stats->syncp);
140	stats->tx_bytes += length;
141	stats->tx_packets++;
142	u64_stats_update_end(&stats->syncp);
143
144	u64_stats_update_begin(&rcv_stats->syncp);
145	rcv_stats->rx_bytes += length;
146	rcv_stats->rx_packets++;
147	u64_stats_update_end(&rcv_stats->syncp);
148
149	return NETDEV_TX_OK;
150
151rx_drop:
152	u64_stats_update_begin(&rcv_stats->syncp);
153	rcv_stats->rx_dropped++;
154	u64_stats_update_end(&rcv_stats->syncp);
155	return NETDEV_TX_OK;
156}
157
158/*
159 * general routines
160 */
161
162static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
163						  struct rtnl_link_stats64 *tot)
164{
165	struct veth_priv *priv = netdev_priv(dev);
166	int cpu;
167
168	for_each_possible_cpu(cpu) {
169		struct veth_net_stats *stats = per_cpu_ptr(priv->stats, cpu);
170		u64 rx_packets, rx_bytes, rx_dropped;
171		u64 tx_packets, tx_bytes;
172		unsigned int start;
173
174		do {
175			start = u64_stats_fetch_begin_bh(&stats->syncp);
176			rx_packets = stats->rx_packets;
177			tx_packets = stats->tx_packets;
178			rx_bytes = stats->rx_bytes;
179			tx_bytes = stats->tx_bytes;
180			rx_dropped = stats->rx_dropped;
181		} while (u64_stats_fetch_retry_bh(&stats->syncp, start));
182		tot->rx_packets += rx_packets;
183		tot->tx_packets += tx_packets;
184		tot->rx_bytes   += rx_bytes;
185		tot->tx_bytes   += tx_bytes;
186		tot->rx_dropped += rx_dropped;
187	}
188
189	return tot;
190}
191
192static int veth_open(struct net_device *dev)
193{
194	struct veth_priv *priv;
195
196	priv = netdev_priv(dev);
197	if (priv->peer == NULL)
198		return -ENOTCONN;
199
200	if (priv->peer->flags & IFF_UP) {
201		netif_carrier_on(dev);
202		netif_carrier_on(priv->peer);
203	}
204	return 0;
205}
206
207static int veth_close(struct net_device *dev)
208{
209	struct veth_priv *priv = netdev_priv(dev);
210
211	netif_carrier_off(dev);
212	netif_carrier_off(priv->peer);
213
214	return 0;
215}
216
217static int is_valid_veth_mtu(int new_mtu)
218{
219	return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
220}
221
222static int veth_change_mtu(struct net_device *dev, int new_mtu)
223{
224	if (!is_valid_veth_mtu(new_mtu))
225		return -EINVAL;
226	dev->mtu = new_mtu;
227	return 0;
228}
229
230static int veth_dev_init(struct net_device *dev)
231{
232	struct veth_net_stats __percpu *stats;
233	struct veth_priv *priv;
234
235	stats = alloc_percpu(struct veth_net_stats);
236	if (stats == NULL)
237		return -ENOMEM;
238
239	priv = netdev_priv(dev);
240	priv->stats = stats;
241	return 0;
242}
243
244static void veth_dev_free(struct net_device *dev)
245{
246	struct veth_priv *priv;
247
248	priv = netdev_priv(dev);
249	free_percpu(priv->stats);
250	free_netdev(dev);
251}
252
253static const struct net_device_ops veth_netdev_ops = {
254	.ndo_init            = veth_dev_init,
255	.ndo_open            = veth_open,
256	.ndo_stop            = veth_close,
257	.ndo_start_xmit      = veth_xmit,
258	.ndo_change_mtu      = veth_change_mtu,
259	.ndo_get_stats64     = veth_get_stats64,
260	.ndo_set_mac_address = eth_mac_addr,
261};
262
263static void veth_setup(struct net_device *dev)
264{
265	ether_setup(dev);
266
267	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
268
269	dev->netdev_ops = &veth_netdev_ops;
270	dev->ethtool_ops = &veth_ethtool_ops;
271	dev->features |= NETIF_F_LLTX;
272	dev->destructor = veth_dev_free;
273
274	dev->hw_features = NETIF_F_NO_CSUM | NETIF_F_SG | NETIF_F_RXCSUM;
275}
276
277/*
278 * netlink interface
279 */
280
281static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
282{
283	if (tb[IFLA_ADDRESS]) {
284		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
285			return -EINVAL;
286		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
287			return -EADDRNOTAVAIL;
288	}
289	if (tb[IFLA_MTU]) {
290		if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
291			return -EINVAL;
292	}
293	return 0;
294}
295
296static struct rtnl_link_ops veth_link_ops;
297
298static int veth_newlink(struct net *src_net, struct net_device *dev,
299			 struct nlattr *tb[], struct nlattr *data[])
300{
301	int err;
302	struct net_device *peer;
303	struct veth_priv *priv;
304	char ifname[IFNAMSIZ];
305	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
306	struct ifinfomsg *ifmp;
307	struct net *net;
308
309	/*
310	 * create and register peer first
311	 */
312	if (data != NULL && data[VETH_INFO_PEER] != NULL) {
313		struct nlattr *nla_peer;
314
315		nla_peer = data[VETH_INFO_PEER];
316		ifmp = nla_data(nla_peer);
317		err = nla_parse(peer_tb, IFLA_MAX,
318				nla_data(nla_peer) + sizeof(struct ifinfomsg),
319				nla_len(nla_peer) - sizeof(struct ifinfomsg),
320				ifla_policy);
321		if (err < 0)
322			return err;
323
324		err = veth_validate(peer_tb, NULL);
325		if (err < 0)
326			return err;
327
328		tbp = peer_tb;
329	} else {
330		ifmp = NULL;
331		tbp = tb;
332	}
333
334	if (tbp[IFLA_IFNAME])
335		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
336	else
337		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
338
339	net = rtnl_link_get_net(src_net, tbp);
340	if (IS_ERR(net))
341		return PTR_ERR(net);
342
343	peer = rtnl_create_link(src_net, net, ifname, &veth_link_ops, tbp);
344	if (IS_ERR(peer)) {
345		put_net(net);
346		return PTR_ERR(peer);
347	}
348
349	if (tbp[IFLA_ADDRESS] == NULL)
350		random_ether_addr(peer->dev_addr);
351
352	err = register_netdevice(peer);
353	put_net(net);
354	net = NULL;
355	if (err < 0)
356		goto err_register_peer;
357
358	netif_carrier_off(peer);
359
360	err = rtnl_configure_link(peer, ifmp);
361	if (err < 0)
362		goto err_configure_peer;
363
364	/*
365	 * register dev last
366	 *
367	 * note, that since we've registered new device the dev's name
368	 * should be re-allocated
369	 */
370
371	if (tb[IFLA_ADDRESS] == NULL)
372		random_ether_addr(dev->dev_addr);
373
374	if (tb[IFLA_IFNAME])
375		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
376	else
377		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
378
379	if (strchr(dev->name, '%')) {
380		err = dev_alloc_name(dev, dev->name);
381		if (err < 0)
382			goto err_alloc_name;
383	}
384
385	err = register_netdevice(dev);
386	if (err < 0)
387		goto err_register_dev;
388
389	netif_carrier_off(dev);
390
391	/*
392	 * tie the deviced together
393	 */
394
395	priv = netdev_priv(dev);
396	priv->peer = peer;
397
398	priv = netdev_priv(peer);
399	priv->peer = dev;
400	return 0;
401
402err_register_dev:
403	/* nothing to do */
404err_alloc_name:
405err_configure_peer:
406	unregister_netdevice(peer);
407	return err;
408
409err_register_peer:
410	free_netdev(peer);
411	return err;
412}
413
414static void veth_dellink(struct net_device *dev, struct list_head *head)
415{
416	struct veth_priv *priv;
417	struct net_device *peer;
418
419	priv = netdev_priv(dev);
420	peer = priv->peer;
421
422	unregister_netdevice_queue(dev, head);
423	unregister_netdevice_queue(peer, head);
424}
425
426static const struct nla_policy veth_policy[VETH_INFO_MAX + 1];
427
428static struct rtnl_link_ops veth_link_ops = {
429	.kind		= DRV_NAME,
430	.priv_size	= sizeof(struct veth_priv),
431	.setup		= veth_setup,
432	.validate	= veth_validate,
433	.newlink	= veth_newlink,
434	.dellink	= veth_dellink,
435	.policy		= veth_policy,
436	.maxtype	= VETH_INFO_MAX,
437};
438
439/*
440 * init/fini
441 */
442
443static __init int veth_init(void)
444{
445	return rtnl_link_register(&veth_link_ops);
446}
447
448static __exit void veth_exit(void)
449{
450	rtnl_link_unregister(&veth_link_ops);
451}
452
453module_init(veth_init);
454module_exit(veth_exit);
455
456MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
457MODULE_LICENSE("GPL v2");
458MODULE_ALIAS_RTNL_LINK(DRV_NAME);
459