1/*
2 *	Linux NET3:	GRE over IP protocol decoder.
3 *
4 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 *	This program is free software; you can redistribute it and/or
7 *	modify it under the terms of the GNU General Public License
8 *	as published by the Free Software Foundation; either version
9 *	2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ip_tunnels.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
48#include <net/rtnetlink.h>
49#include <net/gre.h>
50
51#if IS_ENABLED(CONFIG_IPV6)
52#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58   Problems & solutions
59   --------------------
60
61   1. The most important issue is detecting local dead loops.
62   They would cause complete host lockup in transmit, which
63   would be "resolved" by stack overflow or, if queueing is enabled,
64   with infinite looping in net_bh.
65
66   We cannot track such dead loops during route installation,
67   it is infeasible task. The most general solutions would be
68   to keep skb->encapsulation counter (sort of local ttl),
69   and silently drop packet when it expires. It is a good
70   solution, but it supposes maintaining new variable in ALL
71   skb, even if no tunneling is used.
72
73   Current solution: xmit_recursion breaks dead loops. This is a percpu
74   counter, since when we enter the first ndo_xmit(), cpu migration is
75   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76
77   2. Networking dead loops would not kill routers, but would really
78   kill network. IP hop limit plays role of "t->recursion" in this case,
79   if we copy it from packet being encapsulated to upper header.
80   It is very good solution, but it introduces two problems:
81
82   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83     do not work over tunnels.
84   - traceroute does not work. I planned to relay ICMP from tunnel,
85     so that this problem would be solved and traceroute output
86     would even more informative. This idea appeared to be wrong:
87     only Linux complies to rfc1812 now (yes, guys, Linux is the only
88     true router now :-)), all routers (at least, in neighbourhood of mine)
89     return only 8 bytes of payload. It is the end.
90
91   Hence, if we want that OSPF worked or traceroute said something reasonable,
92   we should search for another solution.
93
94   One of them is to parse packet trying to detect inner encapsulation
95   made by our node. It is difficult or even impossible, especially,
96   taking into account fragmentation. TO be short, ttl is not solution at all.
97
98   Current solution: The solution was UNEXPECTEDLY SIMPLE.
99   We force DF flag on tunnels with preconfigured hop limit,
100   that is ALL. :-) Well, it does not remove the problem completely,
101   but exponential growth of network traffic is changed to linear
102   (branches, that exceed pmtu are pruned) and tunnel mtu
103   rapidly degrades to value <68, where looping stops.
104   Yes, it is not good if there exists a router in the loop,
105   which does not force DF, even when encapsulating packets have DF set.
106   But it is not our problem! Nobody could accuse us, we made
107   all that we could make. Even if it is your gated who injected
108   fatal route to network, even if it were you who configured
109   fatal static route: you are innocent. :-)
110
111   Alexey Kuznetsov.
112 */
113
114static bool log_ecn_error = true;
115module_param(log_ecn_error, bool, 0644);
116MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117
118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119static int ipgre_tunnel_init(struct net_device *dev);
120
121static int ipgre_net_id __read_mostly;
122static int gre_tap_net_id __read_mostly;
123
124static int ipgre_err(struct sk_buff *skb, u32 info,
125		     const struct tnl_ptk_info *tpi)
126{
127
128	/* All the routers (except for Linux) return only
129	   8 bytes of packet payload. It means, that precise relaying of
130	   ICMP in the real Internet is absolutely infeasible.
131
132	   Moreover, Cisco "wise men" put GRE key to the third word
133	   in GRE header. It makes impossible maintaining even soft
134	   state for keyed GRE tunnels with enabled checksum. Tell
135	   them "thank you".
136
137	   Well, I wonder, rfc1812 was written by Cisco employee,
138	   what the hell these idiots break standards established
139	   by themselves???
140	   */
141	struct net *net = dev_net(skb->dev);
142	struct ip_tunnel_net *itn;
143	const struct iphdr *iph;
144	const int type = icmp_hdr(skb)->type;
145	const int code = icmp_hdr(skb)->code;
146	struct ip_tunnel *t;
147
148	switch (type) {
149	default:
150	case ICMP_PARAMETERPROB:
151		return PACKET_RCVD;
152
153	case ICMP_DEST_UNREACH:
154		switch (code) {
155		case ICMP_SR_FAILED:
156		case ICMP_PORT_UNREACH:
157			/* Impossible event. */
158			return PACKET_RCVD;
159		default:
160			/* All others are translated to HOST_UNREACH.
161			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162			   I believe they are just ether pollution. --ANK
163			 */
164			break;
165		}
166		break;
167	case ICMP_TIME_EXCEEDED:
168		if (code != ICMP_EXC_TTL)
169			return PACKET_RCVD;
170		break;
171
172	case ICMP_REDIRECT:
173		break;
174	}
175
176	if (tpi->proto == htons(ETH_P_TEB))
177		itn = net_generic(net, gre_tap_net_id);
178	else
179		itn = net_generic(net, ipgre_net_id);
180
181	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
182	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
183			     iph->daddr, iph->saddr, tpi->key);
184
185	if (t == NULL)
186		return PACKET_REJECT;
187
188	if (t->parms.iph.daddr == 0 ||
189	    ipv4_is_multicast(t->parms.iph.daddr))
190		return PACKET_RCVD;
191
192	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
193		return PACKET_RCVD;
194
195	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
196		t->err_count++;
197	else
198		t->err_count = 1;
199	t->err_time = jiffies;
200	return PACKET_RCVD;
201}
202
203static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
204{
205	struct net *net = dev_net(skb->dev);
206	struct ip_tunnel_net *itn;
207	const struct iphdr *iph;
208	struct ip_tunnel *tunnel;
209
210	if (tpi->proto == htons(ETH_P_TEB))
211		itn = net_generic(net, gre_tap_net_id);
212	else
213		itn = net_generic(net, ipgre_net_id);
214
215	iph = ip_hdr(skb);
216	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
217				  iph->saddr, iph->daddr, tpi->key);
218
219	if (tunnel) {
220		skb_pop_mac_header(skb);
221		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
222		return PACKET_RCVD;
223	}
224	return PACKET_REJECT;
225}
226
227static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
228		       const struct iphdr *tnl_params,
229		       __be16 proto)
230{
231	struct ip_tunnel *tunnel = netdev_priv(dev);
232	struct tnl_ptk_info tpi;
233
234	tpi.flags = tunnel->parms.o_flags;
235	tpi.proto = proto;
236	tpi.key = tunnel->parms.o_key;
237	if (tunnel->parms.o_flags & TUNNEL_SEQ)
238		tunnel->o_seqno++;
239	tpi.seq = htonl(tunnel->o_seqno);
240
241	/* Push GRE header. */
242	gre_build_header(skb, &tpi, tunnel->tun_hlen);
243
244	skb_set_inner_protocol(skb, tpi.proto);
245
246	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
247}
248
249static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
250			      struct net_device *dev)
251{
252	struct ip_tunnel *tunnel = netdev_priv(dev);
253	const struct iphdr *tnl_params;
254
255	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
256	if (IS_ERR(skb))
257		goto out;
258
259	if (dev->header_ops) {
260		/* Need space for new headers */
261		if (skb_cow_head(skb, dev->needed_headroom -
262				      (tunnel->hlen + sizeof(struct iphdr))))
263			goto free_skb;
264
265		tnl_params = (const struct iphdr *)skb->data;
266
267		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
268		 * to gre header.
269		 */
270		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
271	} else {
272		if (skb_cow_head(skb, dev->needed_headroom))
273			goto free_skb;
274
275		tnl_params = &tunnel->parms.iph;
276	}
277
278	__gre_xmit(skb, dev, tnl_params, skb->protocol);
279
280	return NETDEV_TX_OK;
281
282free_skb:
283	kfree_skb(skb);
284out:
285	dev->stats.tx_dropped++;
286	return NETDEV_TX_OK;
287}
288
289static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
290				struct net_device *dev)
291{
292	struct ip_tunnel *tunnel = netdev_priv(dev);
293
294	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
295	if (IS_ERR(skb))
296		goto out;
297
298	if (skb_cow_head(skb, dev->needed_headroom))
299		goto free_skb;
300
301	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
302
303	return NETDEV_TX_OK;
304
305free_skb:
306	kfree_skb(skb);
307out:
308	dev->stats.tx_dropped++;
309	return NETDEV_TX_OK;
310}
311
312static int ipgre_tunnel_ioctl(struct net_device *dev,
313			      struct ifreq *ifr, int cmd)
314{
315	int err;
316	struct ip_tunnel_parm p;
317
318	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
319		return -EFAULT;
320	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
321		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
322		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
323		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
324			return -EINVAL;
325	}
326	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
327	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
328
329	err = ip_tunnel_ioctl(dev, &p, cmd);
330	if (err)
331		return err;
332
333	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
334	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
335
336	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
337		return -EFAULT;
338	return 0;
339}
340
341/* Nice toy. Unfortunately, useless in real life :-)
342   It allows to construct virtual multiprotocol broadcast "LAN"
343   over the Internet, provided multicast routing is tuned.
344
345
346   I have no idea was this bicycle invented before me,
347   so that I had to set ARPHRD_IPGRE to a random value.
348   I have an impression, that Cisco could make something similar,
349   but this feature is apparently missing in IOS<=11.2(8).
350
351   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
352   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
353
354   ping -t 255 224.66.66.66
355
356   If nobody answers, mbone does not work.
357
358   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
359   ip addr add 10.66.66.<somewhat>/24 dev Universe
360   ifconfig Universe up
361   ifconfig Universe add fe80::<Your_real_addr>/10
362   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
363   ftp 10.66.66.66
364   ...
365   ftp fec0:6666:6666::193.233.7.65
366   ...
367 */
368static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
369			unsigned short type,
370			const void *daddr, const void *saddr, unsigned int len)
371{
372	struct ip_tunnel *t = netdev_priv(dev);
373	struct iphdr *iph;
374	struct gre_base_hdr *greh;
375
376	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
377	greh = (struct gre_base_hdr *)(iph+1);
378	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
379	greh->protocol = htons(type);
380
381	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
382
383	/* Set the source hardware address. */
384	if (saddr)
385		memcpy(&iph->saddr, saddr, 4);
386	if (daddr)
387		memcpy(&iph->daddr, daddr, 4);
388	if (iph->daddr)
389		return t->hlen + sizeof(*iph);
390
391	return -(t->hlen + sizeof(*iph));
392}
393
394static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
395{
396	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
397	memcpy(haddr, &iph->saddr, 4);
398	return 4;
399}
400
401static const struct header_ops ipgre_header_ops = {
402	.create	= ipgre_header,
403	.parse	= ipgre_header_parse,
404};
405
406#ifdef CONFIG_NET_IPGRE_BROADCAST
407static int ipgre_open(struct net_device *dev)
408{
409	struct ip_tunnel *t = netdev_priv(dev);
410
411	if (ipv4_is_multicast(t->parms.iph.daddr)) {
412		struct flowi4 fl4;
413		struct rtable *rt;
414
415		rt = ip_route_output_gre(t->net, &fl4,
416					 t->parms.iph.daddr,
417					 t->parms.iph.saddr,
418					 t->parms.o_key,
419					 RT_TOS(t->parms.iph.tos),
420					 t->parms.link);
421		if (IS_ERR(rt))
422			return -EADDRNOTAVAIL;
423		dev = rt->dst.dev;
424		ip_rt_put(rt);
425		if (__in_dev_get_rtnl(dev) == NULL)
426			return -EADDRNOTAVAIL;
427		t->mlink = dev->ifindex;
428		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
429	}
430	return 0;
431}
432
433static int ipgre_close(struct net_device *dev)
434{
435	struct ip_tunnel *t = netdev_priv(dev);
436
437	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
438		struct in_device *in_dev;
439		in_dev = inetdev_by_index(t->net, t->mlink);
440		if (in_dev)
441			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
442	}
443	return 0;
444}
445#endif
446
447static const struct net_device_ops ipgre_netdev_ops = {
448	.ndo_init		= ipgre_tunnel_init,
449	.ndo_uninit		= ip_tunnel_uninit,
450#ifdef CONFIG_NET_IPGRE_BROADCAST
451	.ndo_open		= ipgre_open,
452	.ndo_stop		= ipgre_close,
453#endif
454	.ndo_start_xmit		= ipgre_xmit,
455	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
456	.ndo_change_mtu		= ip_tunnel_change_mtu,
457	.ndo_get_stats64	= ip_tunnel_get_stats64,
458};
459
460#define GRE_FEATURES (NETIF_F_SG |		\
461		      NETIF_F_FRAGLIST |	\
462		      NETIF_F_HIGHDMA |		\
463		      NETIF_F_HW_CSUM)
464
465static void ipgre_tunnel_setup(struct net_device *dev)
466{
467	dev->netdev_ops		= &ipgre_netdev_ops;
468	dev->type		= ARPHRD_IPGRE;
469	ip_tunnel_setup(dev, ipgre_net_id);
470}
471
472static void __gre_tunnel_init(struct net_device *dev)
473{
474	struct ip_tunnel *tunnel;
475	int t_hlen;
476
477	tunnel = netdev_priv(dev);
478	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
479	tunnel->parms.iph.protocol = IPPROTO_GRE;
480
481	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
482
483	t_hlen = tunnel->hlen + sizeof(struct iphdr);
484
485	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
486	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
487
488	dev->features		|= GRE_FEATURES;
489	dev->hw_features	|= GRE_FEATURES;
490
491	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
492		/* TCP offload with GRE SEQ is not supported. */
493		dev->features    |= NETIF_F_GSO_SOFTWARE;
494		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
495		/* Can use a lockless transmit, unless we generate
496		 * output sequences
497		 */
498		dev->features |= NETIF_F_LLTX;
499	}
500}
501
502static int ipgre_tunnel_init(struct net_device *dev)
503{
504	struct ip_tunnel *tunnel = netdev_priv(dev);
505	struct iphdr *iph = &tunnel->parms.iph;
506
507	__gre_tunnel_init(dev);
508
509	memcpy(dev->dev_addr, &iph->saddr, 4);
510	memcpy(dev->broadcast, &iph->daddr, 4);
511
512	dev->flags		= IFF_NOARP;
513	netif_keep_dst(dev);
514	dev->addr_len		= 4;
515
516	if (iph->daddr) {
517#ifdef CONFIG_NET_IPGRE_BROADCAST
518		if (ipv4_is_multicast(iph->daddr)) {
519			if (!iph->saddr)
520				return -EINVAL;
521			dev->flags = IFF_BROADCAST;
522			dev->header_ops = &ipgre_header_ops;
523		}
524#endif
525	} else
526		dev->header_ops = &ipgre_header_ops;
527
528	return ip_tunnel_init(dev);
529}
530
531static struct gre_cisco_protocol ipgre_protocol = {
532	.handler        = ipgre_rcv,
533	.err_handler    = ipgre_err,
534	.priority       = 0,
535};
536
537static int __net_init ipgre_init_net(struct net *net)
538{
539	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
540}
541
542static void __net_exit ipgre_exit_net(struct net *net)
543{
544	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
545	ip_tunnel_delete_net(itn, &ipgre_link_ops);
546}
547
548static struct pernet_operations ipgre_net_ops = {
549	.init = ipgre_init_net,
550	.exit = ipgre_exit_net,
551	.id   = &ipgre_net_id,
552	.size = sizeof(struct ip_tunnel_net),
553};
554
555static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
556{
557	__be16 flags;
558
559	if (!data)
560		return 0;
561
562	flags = 0;
563	if (data[IFLA_GRE_IFLAGS])
564		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
565	if (data[IFLA_GRE_OFLAGS])
566		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
567	if (flags & (GRE_VERSION|GRE_ROUTING))
568		return -EINVAL;
569
570	return 0;
571}
572
573static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
574{
575	__be32 daddr;
576
577	if (tb[IFLA_ADDRESS]) {
578		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
579			return -EINVAL;
580		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
581			return -EADDRNOTAVAIL;
582	}
583
584	if (!data)
585		goto out;
586
587	if (data[IFLA_GRE_REMOTE]) {
588		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
589		if (!daddr)
590			return -EINVAL;
591	}
592
593out:
594	return ipgre_tunnel_validate(tb, data);
595}
596
597static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
598			       struct ip_tunnel_parm *parms)
599{
600	memset(parms, 0, sizeof(*parms));
601
602	parms->iph.protocol = IPPROTO_GRE;
603
604	if (!data)
605		return;
606
607	if (data[IFLA_GRE_LINK])
608		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
609
610	if (data[IFLA_GRE_IFLAGS])
611		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
612
613	if (data[IFLA_GRE_OFLAGS])
614		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
615
616	if (data[IFLA_GRE_IKEY])
617		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
618
619	if (data[IFLA_GRE_OKEY])
620		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
621
622	if (data[IFLA_GRE_LOCAL])
623		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
624
625	if (data[IFLA_GRE_REMOTE])
626		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
627
628	if (data[IFLA_GRE_TTL])
629		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
630
631	if (data[IFLA_GRE_TOS])
632		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
633
634	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
635		parms->iph.frag_off = htons(IP_DF);
636}
637
638/* This function returns true when ENCAP attributes are present in the nl msg */
639static bool ipgre_netlink_encap_parms(struct nlattr *data[],
640				      struct ip_tunnel_encap *ipencap)
641{
642	bool ret = false;
643
644	memset(ipencap, 0, sizeof(*ipencap));
645
646	if (!data)
647		return ret;
648
649	if (data[IFLA_GRE_ENCAP_TYPE]) {
650		ret = true;
651		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
652	}
653
654	if (data[IFLA_GRE_ENCAP_FLAGS]) {
655		ret = true;
656		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
657	}
658
659	if (data[IFLA_GRE_ENCAP_SPORT]) {
660		ret = true;
661		ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
662	}
663
664	if (data[IFLA_GRE_ENCAP_DPORT]) {
665		ret = true;
666		ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
667	}
668
669	return ret;
670}
671
672static int gre_tap_init(struct net_device *dev)
673{
674	__gre_tunnel_init(dev);
675
676	return ip_tunnel_init(dev);
677}
678
679static const struct net_device_ops gre_tap_netdev_ops = {
680	.ndo_init		= gre_tap_init,
681	.ndo_uninit		= ip_tunnel_uninit,
682	.ndo_start_xmit		= gre_tap_xmit,
683	.ndo_set_mac_address 	= eth_mac_addr,
684	.ndo_validate_addr	= eth_validate_addr,
685	.ndo_change_mtu		= ip_tunnel_change_mtu,
686	.ndo_get_stats64	= ip_tunnel_get_stats64,
687};
688
689static void ipgre_tap_setup(struct net_device *dev)
690{
691	ether_setup(dev);
692	dev->netdev_ops		= &gre_tap_netdev_ops;
693	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
694	ip_tunnel_setup(dev, gre_tap_net_id);
695}
696
697static int ipgre_newlink(struct net *src_net, struct net_device *dev,
698			 struct nlattr *tb[], struct nlattr *data[])
699{
700	struct ip_tunnel_parm p;
701	struct ip_tunnel_encap ipencap;
702
703	if (ipgre_netlink_encap_parms(data, &ipencap)) {
704		struct ip_tunnel *t = netdev_priv(dev);
705		int err = ip_tunnel_encap_setup(t, &ipencap);
706
707		if (err < 0)
708			return err;
709	}
710
711	ipgre_netlink_parms(data, tb, &p);
712	return ip_tunnel_newlink(dev, tb, &p);
713}
714
715static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
716			    struct nlattr *data[])
717{
718	struct ip_tunnel_parm p;
719	struct ip_tunnel_encap ipencap;
720
721	if (ipgre_netlink_encap_parms(data, &ipencap)) {
722		struct ip_tunnel *t = netdev_priv(dev);
723		int err = ip_tunnel_encap_setup(t, &ipencap);
724
725		if (err < 0)
726			return err;
727	}
728
729	ipgre_netlink_parms(data, tb, &p);
730	return ip_tunnel_changelink(dev, tb, &p);
731}
732
733static size_t ipgre_get_size(const struct net_device *dev)
734{
735	return
736		/* IFLA_GRE_LINK */
737		nla_total_size(4) +
738		/* IFLA_GRE_IFLAGS */
739		nla_total_size(2) +
740		/* IFLA_GRE_OFLAGS */
741		nla_total_size(2) +
742		/* IFLA_GRE_IKEY */
743		nla_total_size(4) +
744		/* IFLA_GRE_OKEY */
745		nla_total_size(4) +
746		/* IFLA_GRE_LOCAL */
747		nla_total_size(4) +
748		/* IFLA_GRE_REMOTE */
749		nla_total_size(4) +
750		/* IFLA_GRE_TTL */
751		nla_total_size(1) +
752		/* IFLA_GRE_TOS */
753		nla_total_size(1) +
754		/* IFLA_GRE_PMTUDISC */
755		nla_total_size(1) +
756		/* IFLA_GRE_ENCAP_TYPE */
757		nla_total_size(2) +
758		/* IFLA_GRE_ENCAP_FLAGS */
759		nla_total_size(2) +
760		/* IFLA_GRE_ENCAP_SPORT */
761		nla_total_size(2) +
762		/* IFLA_GRE_ENCAP_DPORT */
763		nla_total_size(2) +
764		0;
765}
766
767static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
768{
769	struct ip_tunnel *t = netdev_priv(dev);
770	struct ip_tunnel_parm *p = &t->parms;
771
772	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
773	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
774	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
775	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
776	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
777	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
778	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
779	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
780	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
781	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
782		       !!(p->iph.frag_off & htons(IP_DF))))
783		goto nla_put_failure;
784
785	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
786			t->encap.type) ||
787	    nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
788			t->encap.sport) ||
789	    nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
790			t->encap.dport) ||
791	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
792			t->encap.dport))
793		goto nla_put_failure;
794
795	return 0;
796
797nla_put_failure:
798	return -EMSGSIZE;
799}
800
801static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
802	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
803	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
804	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
805	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
806	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
807	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
808	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
809	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
810	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
811	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
812	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
813	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
814	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
815	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
816};
817
818static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
819	.kind		= "gre",
820	.maxtype	= IFLA_GRE_MAX,
821	.policy		= ipgre_policy,
822	.priv_size	= sizeof(struct ip_tunnel),
823	.setup		= ipgre_tunnel_setup,
824	.validate	= ipgre_tunnel_validate,
825	.newlink	= ipgre_newlink,
826	.changelink	= ipgre_changelink,
827	.dellink	= ip_tunnel_dellink,
828	.get_size	= ipgre_get_size,
829	.fill_info	= ipgre_fill_info,
830};
831
832static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
833	.kind		= "gretap",
834	.maxtype	= IFLA_GRE_MAX,
835	.policy		= ipgre_policy,
836	.priv_size	= sizeof(struct ip_tunnel),
837	.setup		= ipgre_tap_setup,
838	.validate	= ipgre_tap_validate,
839	.newlink	= ipgre_newlink,
840	.changelink	= ipgre_changelink,
841	.dellink	= ip_tunnel_dellink,
842	.get_size	= ipgre_get_size,
843	.fill_info	= ipgre_fill_info,
844};
845
846static int __net_init ipgre_tap_init_net(struct net *net)
847{
848	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
849}
850
851static void __net_exit ipgre_tap_exit_net(struct net *net)
852{
853	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
854	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
855}
856
857static struct pernet_operations ipgre_tap_net_ops = {
858	.init = ipgre_tap_init_net,
859	.exit = ipgre_tap_exit_net,
860	.id   = &gre_tap_net_id,
861	.size = sizeof(struct ip_tunnel_net),
862};
863
864static int __init ipgre_init(void)
865{
866	int err;
867
868	pr_info("GRE over IPv4 tunneling driver\n");
869
870	err = register_pernet_device(&ipgre_net_ops);
871	if (err < 0)
872		return err;
873
874	err = register_pernet_device(&ipgre_tap_net_ops);
875	if (err < 0)
876		goto pnet_tap_faied;
877
878	err = gre_cisco_register(&ipgre_protocol);
879	if (err < 0) {
880		pr_info("%s: can't add protocol\n", __func__);
881		goto add_proto_failed;
882	}
883
884	err = rtnl_link_register(&ipgre_link_ops);
885	if (err < 0)
886		goto rtnl_link_failed;
887
888	err = rtnl_link_register(&ipgre_tap_ops);
889	if (err < 0)
890		goto tap_ops_failed;
891
892	return 0;
893
894tap_ops_failed:
895	rtnl_link_unregister(&ipgre_link_ops);
896rtnl_link_failed:
897	gre_cisco_unregister(&ipgre_protocol);
898add_proto_failed:
899	unregister_pernet_device(&ipgre_tap_net_ops);
900pnet_tap_faied:
901	unregister_pernet_device(&ipgre_net_ops);
902	return err;
903}
904
905static void __exit ipgre_fini(void)
906{
907	rtnl_link_unregister(&ipgre_tap_ops);
908	rtnl_link_unregister(&ipgre_link_ops);
909	gre_cisco_unregister(&ipgre_protocol);
910	unregister_pernet_device(&ipgre_tap_net_ops);
911	unregister_pernet_device(&ipgre_net_ops);
912}
913
914module_init(ipgre_init);
915module_exit(ipgre_fini);
916MODULE_LICENSE("GPL");
917MODULE_ALIAS_RTNL_LINK("gre");
918MODULE_ALIAS_RTNL_LINK("gretap");
919MODULE_ALIAS_NETDEV("gre0");
920MODULE_ALIAS_NETDEV("gretap0");
921