ip_output.c revision 3644f0cee77494190452de132e82245107939284
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The Internet Protocol (IP) output module.
7 *
8 * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9 *
10 * Authors:	Ross Biro
11 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *		Donald Becker, <becker@super.org>
13 *		Alan Cox, <Alan.Cox@linux.org>
14 *		Richard Underwood
15 *		Stefan Becker, <stefanb@yello.ping.de>
16 *		Jorge Cwik, <jorge@laser.satlink.net>
17 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *		Hirokazu Takahashi, <taka@valinux.co.jp>
19 *
20 *	See ip_input.c for original log
21 *
22 *	Fixes:
23 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
24 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
25 *		Bradford Johnson:	Fix faulty handling of some frames when
26 *					no route is found.
27 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
28 *					(in case if packet not accepted by
29 *					output firewall rules)
30 *		Mike McLagan	:	Routing by source
31 *		Alexey Kuznetsov:	use new route cache
32 *		Andi Kleen:		Fix broken PMTU recovery and remove
33 *					some redundant tests.
34 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
35 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
36 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
37 *					for decreased register pressure on x86
38 *					and more readibility.
39 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
40 *					silently drop skb instead of failing with -EPERM.
41 *		Detlev Wengorz	:	Copy protocol for fragments.
42 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
43 *					datagrams.
44 *		Hirokazu Takahashi:	sendfile() on UDP works now.
45 */
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/sched.h>
53#include <linux/mm.h>
54#include <linux/string.h>
55#include <linux/errno.h>
56#include <linux/highmem.h>
57
58#include <linux/socket.h>
59#include <linux/sockios.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/etherdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/stat.h>
66#include <linux/init.h>
67
68#include <net/snmp.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/xfrm.h>
73#include <linux/skbuff.h>
74#include <net/sock.h>
75#include <net/arp.h>
76#include <net/icmp.h>
77#include <net/checksum.h>
78#include <net/inetpeer.h>
79#include <net/checksum.h>
80#include <linux/igmp.h>
81#include <linux/netfilter_ipv4.h>
82#include <linux/netfilter_bridge.h>
83#include <linux/mroute.h>
84#include <linux/netlink.h>
85#include <linux/tcp.h>
86
87int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
88
89/* Generate a checksum for an outgoing IP datagram. */
90__inline__ void ip_send_check(struct iphdr *iph)
91{
92	iph->check = 0;
93	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
94}
95
96/* dev_loopback_xmit for use with netfilter. */
97static int ip_dev_loopback_xmit(struct sk_buff *newskb)
98{
99	newskb->mac.raw = newskb->data;
100	__skb_pull(newskb, newskb->nh.raw - newskb->data);
101	newskb->pkt_type = PACKET_LOOPBACK;
102	newskb->ip_summed = CHECKSUM_UNNECESSARY;
103	BUG_TRAP(newskb->dst);
104	netif_rx(newskb);
105	return 0;
106}
107
108static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
109{
110	int ttl = inet->uc_ttl;
111
112	if (ttl < 0)
113		ttl = dst_metric(dst, RTAX_HOPLIMIT);
114	return ttl;
115}
116
117/*
118 *		Add an ip header to a skbuff and send it out.
119 *
120 */
121int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
122			  __be32 saddr, __be32 daddr, struct ip_options *opt)
123{
124	struct inet_sock *inet = inet_sk(sk);
125	struct rtable *rt = (struct rtable *)skb->dst;
126	struct iphdr *iph;
127
128	/* Build the IP header. */
129	if (opt)
130		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
131	else
132		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
133
134	iph->version  = 4;
135	iph->ihl      = 5;
136	iph->tos      = inet->tos;
137	if (ip_dont_fragment(sk, &rt->u.dst))
138		iph->frag_off = htons(IP_DF);
139	else
140		iph->frag_off = 0;
141	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
142	iph->daddr    = rt->rt_dst;
143	iph->saddr    = rt->rt_src;
144	iph->protocol = sk->sk_protocol;
145	iph->tot_len  = htons(skb->len);
146	ip_select_ident(iph, &rt->u.dst, sk);
147	skb->nh.iph   = iph;
148
149	if (opt && opt->optlen) {
150		iph->ihl += opt->optlen>>2;
151		ip_options_build(skb, opt, daddr, rt, 0);
152	}
153	ip_send_check(iph);
154
155	skb->priority = sk->sk_priority;
156
157	/* Send it out. */
158	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
159		       dst_output);
160}
161
162EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
163
164static inline int ip_finish_output2(struct sk_buff *skb)
165{
166	struct dst_entry *dst = skb->dst;
167	struct net_device *dev = dst->dev;
168	int hh_len = LL_RESERVED_SPACE(dev);
169
170	/* Be paranoid, rather than too clever. */
171	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
172		struct sk_buff *skb2;
173
174		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
175		if (skb2 == NULL) {
176			kfree_skb(skb);
177			return -ENOMEM;
178		}
179		if (skb->sk)
180			skb_set_owner_w(skb2, skb->sk);
181		kfree_skb(skb);
182		skb = skb2;
183	}
184
185	if (dst->hh)
186		return neigh_hh_output(dst->hh, skb);
187	else if (dst->neighbour)
188		return dst->neighbour->output(skb);
189
190	if (net_ratelimit())
191		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
192	kfree_skb(skb);
193	return -EINVAL;
194}
195
196static inline int ip_finish_output(struct sk_buff *skb)
197{
198#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
199	/* Policy lookup after SNAT yielded a new policy */
200	if (skb->dst->xfrm != NULL) {
201		IPCB(skb)->flags |= IPSKB_REROUTED;
202		return dst_output(skb);
203	}
204#endif
205	if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
206		return ip_fragment(skb, ip_finish_output2);
207	else
208		return ip_finish_output2(skb);
209}
210
211int ip_mc_output(struct sk_buff *skb)
212{
213	struct sock *sk = skb->sk;
214	struct rtable *rt = (struct rtable*)skb->dst;
215	struct net_device *dev = rt->u.dst.dev;
216
217	/*
218	 *	If the indicated interface is up and running, send the packet.
219	 */
220	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
221
222	skb->dev = dev;
223	skb->protocol = htons(ETH_P_IP);
224
225	/*
226	 *	Multicasts are looped back for other local users
227	 */
228
229	if (rt->rt_flags&RTCF_MULTICAST) {
230		if ((!sk || inet_sk(sk)->mc_loop)
231#ifdef CONFIG_IP_MROUTE
232		/* Small optimization: do not loopback not local frames,
233		   which returned after forwarding; they will be  dropped
234		   by ip_mr_input in any case.
235		   Note, that local frames are looped back to be delivered
236		   to local recipients.
237
238		   This check is duplicated in ip_mr_input at the moment.
239		 */
240		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
241#endif
242		) {
243			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
244			if (newskb)
245				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
246					newskb->dev,
247					ip_dev_loopback_xmit);
248		}
249
250		/* Multicasts with ttl 0 must not go beyond the host */
251
252		if (skb->nh.iph->ttl == 0) {
253			kfree_skb(skb);
254			return 0;
255		}
256	}
257
258	if (rt->rt_flags&RTCF_BROADCAST) {
259		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
260		if (newskb)
261			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
262				newskb->dev, ip_dev_loopback_xmit);
263	}
264
265	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
266			    ip_finish_output,
267			    !(IPCB(skb)->flags & IPSKB_REROUTED));
268}
269
270int ip_output(struct sk_buff *skb)
271{
272	struct net_device *dev = skb->dst->dev;
273
274	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
275
276	skb->dev = dev;
277	skb->protocol = htons(ETH_P_IP);
278
279	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
280		            ip_finish_output,
281			    !(IPCB(skb)->flags & IPSKB_REROUTED));
282}
283
284int ip_queue_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok)
285{
286	struct inet_sock *inet = inet_sk(sk);
287	struct ip_options *opt = inet->opt;
288	struct rtable *rt;
289	struct iphdr *iph;
290
291	/* Skip all of this if the packet is already routed,
292	 * f.e. by something like SCTP.
293	 */
294	rt = (struct rtable *) skb->dst;
295	if (rt != NULL)
296		goto packet_routed;
297
298	/* Make sure we can route this packet. */
299	rt = (struct rtable *)__sk_dst_check(sk, 0);
300	if (rt == NULL) {
301		__be32 daddr;
302
303		/* Use correct destination address if we have options. */
304		daddr = inet->daddr;
305		if(opt && opt->srr)
306			daddr = opt->faddr;
307
308		{
309			struct flowi fl = { .oif = sk->sk_bound_dev_if,
310					    .nl_u = { .ip4_u =
311						      { .daddr = daddr,
312							.saddr = inet->saddr,
313							.tos = RT_CONN_FLAGS(sk) } },
314					    .proto = sk->sk_protocol,
315					    .uli_u = { .ports =
316						       { .sport = inet->sport,
317							 .dport = inet->dport } } };
318
319			/* If this fails, retransmit mechanism of transport layer will
320			 * keep trying until route appears or the connection times
321			 * itself out.
322			 */
323			security_sk_classify_flow(sk, &fl);
324			if (ip_route_output_flow(&rt, &fl, sk, 0))
325				goto no_route;
326		}
327		sk_setup_caps(sk, &rt->u.dst);
328	}
329	skb->dst = dst_clone(&rt->u.dst);
330
331packet_routed:
332	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
333		goto no_route;
334
335	/* OK, we know where to send it, allocate and build IP header. */
336	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
337	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
338	iph->tot_len = htons(skb->len);
339	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
340		iph->frag_off = htons(IP_DF);
341	else
342		iph->frag_off = 0;
343	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
344	iph->protocol = sk->sk_protocol;
345	iph->saddr    = rt->rt_src;
346	iph->daddr    = rt->rt_dst;
347	skb->nh.iph   = iph;
348	/* Transport layer set skb->h.foo itself. */
349
350	if (opt && opt->optlen) {
351		iph->ihl += opt->optlen >> 2;
352		ip_options_build(skb, opt, inet->daddr, rt, 0);
353	}
354
355	ip_select_ident_more(iph, &rt->u.dst, sk,
356			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
357
358	/* Add an IP checksum. */
359	ip_send_check(iph);
360
361	skb->priority = sk->sk_priority;
362
363	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
364		       dst_output);
365
366no_route:
367	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
368	kfree_skb(skb);
369	return -EHOSTUNREACH;
370}
371
372
373static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
374{
375	to->pkt_type = from->pkt_type;
376	to->priority = from->priority;
377	to->protocol = from->protocol;
378	dst_release(to->dst);
379	to->dst = dst_clone(from->dst);
380	to->dev = from->dev;
381	to->mark = from->mark;
382
383	/* Copy the flags to each fragment. */
384	IPCB(to)->flags = IPCB(from)->flags;
385
386#ifdef CONFIG_NET_SCHED
387	to->tc_index = from->tc_index;
388#endif
389#ifdef CONFIG_NETFILTER
390	/* Connection association is same as pre-frag packet */
391	nf_conntrack_put(to->nfct);
392	to->nfct = from->nfct;
393	nf_conntrack_get(to->nfct);
394	to->nfctinfo = from->nfctinfo;
395#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
396	to->ipvs_property = from->ipvs_property;
397#endif
398#ifdef CONFIG_BRIDGE_NETFILTER
399	nf_bridge_put(to->nf_bridge);
400	to->nf_bridge = from->nf_bridge;
401	nf_bridge_get(to->nf_bridge);
402#endif
403#endif
404	skb_copy_secmark(to, from);
405}
406
407/*
408 *	This IP datagram is too large to be sent in one piece.  Break it up into
409 *	smaller pieces (each of size equal to IP header plus
410 *	a block of the data of the original IP data part) that will yet fit in a
411 *	single device frame, and queue such a frame for sending.
412 */
413
414int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
415{
416	struct iphdr *iph;
417	int raw = 0;
418	int ptr;
419	struct net_device *dev;
420	struct sk_buff *skb2;
421	unsigned int mtu, hlen, left, len, ll_rs, pad;
422	int offset;
423	__be16 not_last_frag;
424	struct rtable *rt = (struct rtable*)skb->dst;
425	int err = 0;
426
427	dev = rt->u.dst.dev;
428
429	/*
430	 *	Point into the IP datagram header.
431	 */
432
433	iph = skb->nh.iph;
434
435	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
436		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
437		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
438			  htonl(dst_mtu(&rt->u.dst)));
439		kfree_skb(skb);
440		return -EMSGSIZE;
441	}
442
443	/*
444	 *	Setup starting values.
445	 */
446
447	hlen = iph->ihl * 4;
448	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
449	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
450
451	/* When frag_list is given, use it. First, check its validity:
452	 * some transformers could create wrong frag_list or break existing
453	 * one, it is not prohibited. In this case fall back to copying.
454	 *
455	 * LATER: this step can be merged to real generation of fragments,
456	 * we can switch to copy when see the first bad fragment.
457	 */
458	if (skb_shinfo(skb)->frag_list) {
459		struct sk_buff *frag;
460		int first_len = skb_pagelen(skb);
461
462		if (first_len - hlen > mtu ||
463		    ((first_len - hlen) & 7) ||
464		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
465		    skb_cloned(skb))
466			goto slow_path;
467
468		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
469			/* Correct geometry. */
470			if (frag->len > mtu ||
471			    ((frag->len & 7) && frag->next) ||
472			    skb_headroom(frag) < hlen)
473			    goto slow_path;
474
475			/* Partially cloned skb? */
476			if (skb_shared(frag))
477				goto slow_path;
478
479			BUG_ON(frag->sk);
480			if (skb->sk) {
481				sock_hold(skb->sk);
482				frag->sk = skb->sk;
483				frag->destructor = sock_wfree;
484				skb->truesize -= frag->truesize;
485			}
486		}
487
488		/* Everything is OK. Generate! */
489
490		err = 0;
491		offset = 0;
492		frag = skb_shinfo(skb)->frag_list;
493		skb_shinfo(skb)->frag_list = NULL;
494		skb->data_len = first_len - skb_headlen(skb);
495		skb->len = first_len;
496		iph->tot_len = htons(first_len);
497		iph->frag_off = htons(IP_MF);
498		ip_send_check(iph);
499
500		for (;;) {
501			/* Prepare header of the next frame,
502			 * before previous one went down. */
503			if (frag) {
504				frag->ip_summed = CHECKSUM_NONE;
505				frag->h.raw = frag->data;
506				frag->nh.raw = __skb_push(frag, hlen);
507				memcpy(frag->nh.raw, iph, hlen);
508				iph = frag->nh.iph;
509				iph->tot_len = htons(frag->len);
510				ip_copy_metadata(frag, skb);
511				if (offset == 0)
512					ip_options_fragment(frag);
513				offset += skb->len - hlen;
514				iph->frag_off = htons(offset>>3);
515				if (frag->next != NULL)
516					iph->frag_off |= htons(IP_MF);
517				/* Ready, complete checksum */
518				ip_send_check(iph);
519			}
520
521			err = output(skb);
522
523			if (!err)
524				IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
525			if (err || !frag)
526				break;
527
528			skb = frag;
529			frag = skb->next;
530			skb->next = NULL;
531		}
532
533		if (err == 0) {
534			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
535			return 0;
536		}
537
538		while (frag) {
539			skb = frag->next;
540			kfree_skb(frag);
541			frag = skb;
542		}
543		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
544		return err;
545	}
546
547slow_path:
548	left = skb->len - hlen;		/* Space per frame */
549	ptr = raw + hlen;		/* Where to start from */
550
551	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
552	 * we need to make room for the encapsulating header
553	 */
554	pad = nf_bridge_pad(skb);
555	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
556	mtu -= pad;
557
558	/*
559	 *	Fragment the datagram.
560	 */
561
562	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
563	not_last_frag = iph->frag_off & htons(IP_MF);
564
565	/*
566	 *	Keep copying data until we run out.
567	 */
568
569	while(left > 0)	{
570		len = left;
571		/* IF: it doesn't fit, use 'mtu' - the data space left */
572		if (len > mtu)
573			len = mtu;
574		/* IF: we are not sending upto and including the packet end
575		   then align the next start on an eight byte boundary */
576		if (len < left)	{
577			len &= ~7;
578		}
579		/*
580		 *	Allocate buffer.
581		 */
582
583		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
584			NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
585			err = -ENOMEM;
586			goto fail;
587		}
588
589		/*
590		 *	Set up data on packet
591		 */
592
593		ip_copy_metadata(skb2, skb);
594		skb_reserve(skb2, ll_rs);
595		skb_put(skb2, len + hlen);
596		skb2->nh.raw = skb2->data;
597		skb2->h.raw = skb2->data + hlen;
598
599		/*
600		 *	Charge the memory for the fragment to any owner
601		 *	it might possess
602		 */
603
604		if (skb->sk)
605			skb_set_owner_w(skb2, skb->sk);
606
607		/*
608		 *	Copy the packet header into the new buffer.
609		 */
610
611		memcpy(skb2->nh.raw, skb->data, hlen);
612
613		/*
614		 *	Copy a block of the IP datagram.
615		 */
616		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
617			BUG();
618		left -= len;
619
620		/*
621		 *	Fill in the new header fields.
622		 */
623		iph = skb2->nh.iph;
624		iph->frag_off = htons((offset >> 3));
625
626		/* ANK: dirty, but effective trick. Upgrade options only if
627		 * the segment to be fragmented was THE FIRST (otherwise,
628		 * options are already fixed) and make it ONCE
629		 * on the initial skb, so that all the following fragments
630		 * will inherit fixed options.
631		 */
632		if (offset == 0)
633			ip_options_fragment(skb);
634
635		/*
636		 *	Added AC : If we are fragmenting a fragment that's not the
637		 *		   last fragment then keep MF on each bit
638		 */
639		if (left > 0 || not_last_frag)
640			iph->frag_off |= htons(IP_MF);
641		ptr += len;
642		offset += len;
643
644		/*
645		 *	Put this fragment into the sending queue.
646		 */
647		iph->tot_len = htons(len + hlen);
648
649		ip_send_check(iph);
650
651		err = output(skb2);
652		if (err)
653			goto fail;
654
655		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
656	}
657	kfree_skb(skb);
658	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
659	return err;
660
661fail:
662	kfree_skb(skb);
663	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
664	return err;
665}
666
667EXPORT_SYMBOL(ip_fragment);
668
669int
670ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
671{
672	struct iovec *iov = from;
673
674	if (skb->ip_summed == CHECKSUM_PARTIAL) {
675		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
676			return -EFAULT;
677	} else {
678		__wsum csum = 0;
679		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
680			return -EFAULT;
681		skb->csum = csum_block_add(skb->csum, csum, odd);
682	}
683	return 0;
684}
685
686static inline __wsum
687csum_page(struct page *page, int offset, int copy)
688{
689	char *kaddr;
690	__wsum csum;
691	kaddr = kmap(page);
692	csum = csum_partial(kaddr + offset, copy, 0);
693	kunmap(page);
694	return csum;
695}
696
697static inline int ip_ufo_append_data(struct sock *sk,
698			int getfrag(void *from, char *to, int offset, int len,
699			       int odd, struct sk_buff *skb),
700			void *from, int length, int hh_len, int fragheaderlen,
701			int transhdrlen, int mtu,unsigned int flags)
702{
703	struct sk_buff *skb;
704	int err;
705
706	/* There is support for UDP fragmentation offload by network
707	 * device, so create one single skb packet containing complete
708	 * udp datagram
709	 */
710	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
711		skb = sock_alloc_send_skb(sk,
712			hh_len + fragheaderlen + transhdrlen + 20,
713			(flags & MSG_DONTWAIT), &err);
714
715		if (skb == NULL)
716			return err;
717
718		/* reserve space for Hardware header */
719		skb_reserve(skb, hh_len);
720
721		/* create space for UDP/IP header */
722		skb_put(skb,fragheaderlen + transhdrlen);
723
724		/* initialize network header pointer */
725		skb->nh.raw = skb->data;
726
727		/* initialize protocol header pointer */
728		skb->h.raw = skb->data + fragheaderlen;
729
730		skb->ip_summed = CHECKSUM_PARTIAL;
731		skb->csum = 0;
732		sk->sk_sndmsg_off = 0;
733	}
734
735	err = skb_append_datato_frags(sk,skb, getfrag, from,
736			       (length - transhdrlen));
737	if (!err) {
738		/* specify the length of each IP datagram fragment*/
739		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
740		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
741		__skb_queue_tail(&sk->sk_write_queue, skb);
742
743		return 0;
744	}
745	/* There is not enough support do UFO ,
746	 * so follow normal path
747	 */
748	kfree_skb(skb);
749	return err;
750}
751
752/*
753 *	ip_append_data() and ip_append_page() can make one large IP datagram
754 *	from many pieces of data. Each pieces will be holded on the socket
755 *	until ip_push_pending_frames() is called. Each piece can be a page
756 *	or non-page data.
757 *
758 *	Not only UDP, other transport protocols - e.g. raw sockets - can use
759 *	this interface potentially.
760 *
761 *	LATER: length must be adjusted by pad at tail, when it is required.
762 */
763int ip_append_data(struct sock *sk,
764		   int getfrag(void *from, char *to, int offset, int len,
765			       int odd, struct sk_buff *skb),
766		   void *from, int length, int transhdrlen,
767		   struct ipcm_cookie *ipc, struct rtable *rt,
768		   unsigned int flags)
769{
770	struct inet_sock *inet = inet_sk(sk);
771	struct sk_buff *skb;
772
773	struct ip_options *opt = NULL;
774	int hh_len;
775	int exthdrlen;
776	int mtu;
777	int copy;
778	int err;
779	int offset = 0;
780	unsigned int maxfraglen, fragheaderlen;
781	int csummode = CHECKSUM_NONE;
782
783	if (flags&MSG_PROBE)
784		return 0;
785
786	if (skb_queue_empty(&sk->sk_write_queue)) {
787		/*
788		 * setup for corking.
789		 */
790		opt = ipc->opt;
791		if (opt) {
792			if (inet->cork.opt == NULL) {
793				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
794				if (unlikely(inet->cork.opt == NULL))
795					return -ENOBUFS;
796			}
797			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
798			inet->cork.flags |= IPCORK_OPT;
799			inet->cork.addr = ipc->addr;
800		}
801		dst_hold(&rt->u.dst);
802		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
803		inet->cork.rt = rt;
804		inet->cork.length = 0;
805		sk->sk_sndmsg_page = NULL;
806		sk->sk_sndmsg_off = 0;
807		if ((exthdrlen = rt->u.dst.header_len) != 0) {
808			length += exthdrlen;
809			transhdrlen += exthdrlen;
810		}
811	} else {
812		rt = inet->cork.rt;
813		if (inet->cork.flags & IPCORK_OPT)
814			opt = inet->cork.opt;
815
816		transhdrlen = 0;
817		exthdrlen = 0;
818		mtu = inet->cork.fragsize;
819	}
820	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
821
822	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
823	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
824
825	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
826		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
827		return -EMSGSIZE;
828	}
829
830	/*
831	 * transhdrlen > 0 means that this is the first fragment and we wish
832	 * it won't be fragmented in the future.
833	 */
834	if (transhdrlen &&
835	    length + fragheaderlen <= mtu &&
836	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
837	    !exthdrlen)
838		csummode = CHECKSUM_PARTIAL;
839
840	inet->cork.length += length;
841	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
842			(rt->u.dst.dev->features & NETIF_F_UFO)) {
843
844		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
845					 fragheaderlen, transhdrlen, mtu,
846					 flags);
847		if (err)
848			goto error;
849		return 0;
850	}
851
852	/* So, what's going on in the loop below?
853	 *
854	 * We use calculated fragment length to generate chained skb,
855	 * each of segments is IP fragment ready for sending to network after
856	 * adding appropriate IP header.
857	 */
858
859	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
860		goto alloc_new_skb;
861
862	while (length > 0) {
863		/* Check if the remaining data fits into current packet. */
864		copy = mtu - skb->len;
865		if (copy < length)
866			copy = maxfraglen - skb->len;
867		if (copy <= 0) {
868			char *data;
869			unsigned int datalen;
870			unsigned int fraglen;
871			unsigned int fraggap;
872			unsigned int alloclen;
873			struct sk_buff *skb_prev;
874alloc_new_skb:
875			skb_prev = skb;
876			if (skb_prev)
877				fraggap = skb_prev->len - maxfraglen;
878			else
879				fraggap = 0;
880
881			/*
882			 * If remaining data exceeds the mtu,
883			 * we know we need more fragment(s).
884			 */
885			datalen = length + fraggap;
886			if (datalen > mtu - fragheaderlen)
887				datalen = maxfraglen - fragheaderlen;
888			fraglen = datalen + fragheaderlen;
889
890			if ((flags & MSG_MORE) &&
891			    !(rt->u.dst.dev->features&NETIF_F_SG))
892				alloclen = mtu;
893			else
894				alloclen = datalen + fragheaderlen;
895
896			/* The last fragment gets additional space at tail.
897			 * Note, with MSG_MORE we overallocate on fragments,
898			 * because we have no idea what fragment will be
899			 * the last.
900			 */
901			if (datalen == length + fraggap)
902				alloclen += rt->u.dst.trailer_len;
903
904			if (transhdrlen) {
905				skb = sock_alloc_send_skb(sk,
906						alloclen + hh_len + 15,
907						(flags & MSG_DONTWAIT), &err);
908			} else {
909				skb = NULL;
910				if (atomic_read(&sk->sk_wmem_alloc) <=
911				    2 * sk->sk_sndbuf)
912					skb = sock_wmalloc(sk,
913							   alloclen + hh_len + 15, 1,
914							   sk->sk_allocation);
915				if (unlikely(skb == NULL))
916					err = -ENOBUFS;
917			}
918			if (skb == NULL)
919				goto error;
920
921			/*
922			 *	Fill in the control structures
923			 */
924			skb->ip_summed = csummode;
925			skb->csum = 0;
926			skb_reserve(skb, hh_len);
927
928			/*
929			 *	Find where to start putting bytes.
930			 */
931			data = skb_put(skb, fraglen);
932			skb->nh.raw = data + exthdrlen;
933			data += fragheaderlen;
934			skb->h.raw = data + exthdrlen;
935
936			if (fraggap) {
937				skb->csum = skb_copy_and_csum_bits(
938					skb_prev, maxfraglen,
939					data + transhdrlen, fraggap, 0);
940				skb_prev->csum = csum_sub(skb_prev->csum,
941							  skb->csum);
942				data += fraggap;
943				pskb_trim_unique(skb_prev, maxfraglen);
944			}
945
946			copy = datalen - transhdrlen - fraggap;
947			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
948				err = -EFAULT;
949				kfree_skb(skb);
950				goto error;
951			}
952
953			offset += copy;
954			length -= datalen - fraggap;
955			transhdrlen = 0;
956			exthdrlen = 0;
957			csummode = CHECKSUM_NONE;
958
959			/*
960			 * Put the packet on the pending queue.
961			 */
962			__skb_queue_tail(&sk->sk_write_queue, skb);
963			continue;
964		}
965
966		if (copy > length)
967			copy = length;
968
969		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
970			unsigned int off;
971
972			off = skb->len;
973			if (getfrag(from, skb_put(skb, copy),
974					offset, copy, off, skb) < 0) {
975				__skb_trim(skb, off);
976				err = -EFAULT;
977				goto error;
978			}
979		} else {
980			int i = skb_shinfo(skb)->nr_frags;
981			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
982			struct page *page = sk->sk_sndmsg_page;
983			int off = sk->sk_sndmsg_off;
984			unsigned int left;
985
986			if (page && (left = PAGE_SIZE - off) > 0) {
987				if (copy >= left)
988					copy = left;
989				if (page != frag->page) {
990					if (i == MAX_SKB_FRAGS) {
991						err = -EMSGSIZE;
992						goto error;
993					}
994					get_page(page);
995	 				skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
996					frag = &skb_shinfo(skb)->frags[i];
997				}
998			} else if (i < MAX_SKB_FRAGS) {
999				if (copy > PAGE_SIZE)
1000					copy = PAGE_SIZE;
1001				page = alloc_pages(sk->sk_allocation, 0);
1002				if (page == NULL)  {
1003					err = -ENOMEM;
1004					goto error;
1005				}
1006				sk->sk_sndmsg_page = page;
1007				sk->sk_sndmsg_off = 0;
1008
1009				skb_fill_page_desc(skb, i, page, 0, 0);
1010				frag = &skb_shinfo(skb)->frags[i];
1011				skb->truesize += PAGE_SIZE;
1012				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1013			} else {
1014				err = -EMSGSIZE;
1015				goto error;
1016			}
1017			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1018				err = -EFAULT;
1019				goto error;
1020			}
1021			sk->sk_sndmsg_off += copy;
1022			frag->size += copy;
1023			skb->len += copy;
1024			skb->data_len += copy;
1025		}
1026		offset += copy;
1027		length -= copy;
1028	}
1029
1030	return 0;
1031
1032error:
1033	inet->cork.length -= length;
1034	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1035	return err;
1036}
1037
1038ssize_t	ip_append_page(struct sock *sk, struct page *page,
1039		       int offset, size_t size, int flags)
1040{
1041	struct inet_sock *inet = inet_sk(sk);
1042	struct sk_buff *skb;
1043	struct rtable *rt;
1044	struct ip_options *opt = NULL;
1045	int hh_len;
1046	int mtu;
1047	int len;
1048	int err;
1049	unsigned int maxfraglen, fragheaderlen, fraggap;
1050
1051	if (inet->hdrincl)
1052		return -EPERM;
1053
1054	if (flags&MSG_PROBE)
1055		return 0;
1056
1057	if (skb_queue_empty(&sk->sk_write_queue))
1058		return -EINVAL;
1059
1060	rt = inet->cork.rt;
1061	if (inet->cork.flags & IPCORK_OPT)
1062		opt = inet->cork.opt;
1063
1064	if (!(rt->u.dst.dev->features&NETIF_F_SG))
1065		return -EOPNOTSUPP;
1066
1067	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1068	mtu = inet->cork.fragsize;
1069
1070	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1071	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1072
1073	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1074		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1075		return -EMSGSIZE;
1076	}
1077
1078	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1079		return -EINVAL;
1080
1081	inet->cork.length += size;
1082	if ((sk->sk_protocol == IPPROTO_UDP) &&
1083	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1084		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1085		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1086	}
1087
1088
1089	while (size > 0) {
1090		int i;
1091
1092		if (skb_is_gso(skb))
1093			len = size;
1094		else {
1095
1096			/* Check if the remaining data fits into current packet. */
1097			len = mtu - skb->len;
1098			if (len < size)
1099				len = maxfraglen - skb->len;
1100		}
1101		if (len <= 0) {
1102			struct sk_buff *skb_prev;
1103			char *data;
1104			struct iphdr *iph;
1105			int alloclen;
1106
1107			skb_prev = skb;
1108			fraggap = skb_prev->len - maxfraglen;
1109
1110			alloclen = fragheaderlen + hh_len + fraggap + 15;
1111			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1112			if (unlikely(!skb)) {
1113				err = -ENOBUFS;
1114				goto error;
1115			}
1116
1117			/*
1118			 *	Fill in the control structures
1119			 */
1120			skb->ip_summed = CHECKSUM_NONE;
1121			skb->csum = 0;
1122			skb_reserve(skb, hh_len);
1123
1124			/*
1125			 *	Find where to start putting bytes.
1126			 */
1127			data = skb_put(skb, fragheaderlen + fraggap);
1128			skb->nh.iph = iph = (struct iphdr *)data;
1129			data += fragheaderlen;
1130			skb->h.raw = data;
1131
1132			if (fraggap) {
1133				skb->csum = skb_copy_and_csum_bits(
1134					skb_prev, maxfraglen,
1135					data, fraggap, 0);
1136				skb_prev->csum = csum_sub(skb_prev->csum,
1137							  skb->csum);
1138				pskb_trim_unique(skb_prev, maxfraglen);
1139			}
1140
1141			/*
1142			 * Put the packet on the pending queue.
1143			 */
1144			__skb_queue_tail(&sk->sk_write_queue, skb);
1145			continue;
1146		}
1147
1148		i = skb_shinfo(skb)->nr_frags;
1149		if (len > size)
1150			len = size;
1151		if (skb_can_coalesce(skb, i, page, offset)) {
1152			skb_shinfo(skb)->frags[i-1].size += len;
1153		} else if (i < MAX_SKB_FRAGS) {
1154			get_page(page);
1155			skb_fill_page_desc(skb, i, page, offset, len);
1156		} else {
1157			err = -EMSGSIZE;
1158			goto error;
1159		}
1160
1161		if (skb->ip_summed == CHECKSUM_NONE) {
1162			__wsum csum;
1163			csum = csum_page(page, offset, len);
1164			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1165		}
1166
1167		skb->len += len;
1168		skb->data_len += len;
1169		offset += len;
1170		size -= len;
1171	}
1172	return 0;
1173
1174error:
1175	inet->cork.length -= size;
1176	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1177	return err;
1178}
1179
1180/*
1181 *	Combined all pending IP fragments on the socket as one IP datagram
1182 *	and push them out.
1183 */
1184int ip_push_pending_frames(struct sock *sk)
1185{
1186	struct sk_buff *skb, *tmp_skb;
1187	struct sk_buff **tail_skb;
1188	struct inet_sock *inet = inet_sk(sk);
1189	struct ip_options *opt = NULL;
1190	struct rtable *rt = inet->cork.rt;
1191	struct iphdr *iph;
1192	__be16 df = 0;
1193	__u8 ttl;
1194	int err = 0;
1195
1196	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1197		goto out;
1198	tail_skb = &(skb_shinfo(skb)->frag_list);
1199
1200	/* move skb->data to ip header from ext header */
1201	if (skb->data < skb->nh.raw)
1202		__skb_pull(skb, skb->nh.raw - skb->data);
1203	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1204		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1205		*tail_skb = tmp_skb;
1206		tail_skb = &(tmp_skb->next);
1207		skb->len += tmp_skb->len;
1208		skb->data_len += tmp_skb->len;
1209		skb->truesize += tmp_skb->truesize;
1210		__sock_put(tmp_skb->sk);
1211		tmp_skb->destructor = NULL;
1212		tmp_skb->sk = NULL;
1213	}
1214
1215	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1216	 * to fragment the frame generated here. No matter, what transforms
1217	 * how transforms change size of the packet, it will come out.
1218	 */
1219	if (inet->pmtudisc != IP_PMTUDISC_DO)
1220		skb->local_df = 1;
1221
1222	/* DF bit is set when we want to see DF on outgoing frames.
1223	 * If local_df is set too, we still allow to fragment this frame
1224	 * locally. */
1225	if (inet->pmtudisc == IP_PMTUDISC_DO ||
1226	    (skb->len <= dst_mtu(&rt->u.dst) &&
1227	     ip_dont_fragment(sk, &rt->u.dst)))
1228		df = htons(IP_DF);
1229
1230	if (inet->cork.flags & IPCORK_OPT)
1231		opt = inet->cork.opt;
1232
1233	if (rt->rt_type == RTN_MULTICAST)
1234		ttl = inet->mc_ttl;
1235	else
1236		ttl = ip_select_ttl(inet, &rt->u.dst);
1237
1238	iph = (struct iphdr *)skb->data;
1239	iph->version = 4;
1240	iph->ihl = 5;
1241	if (opt) {
1242		iph->ihl += opt->optlen>>2;
1243		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1244	}
1245	iph->tos = inet->tos;
1246	iph->tot_len = htons(skb->len);
1247	iph->frag_off = df;
1248	ip_select_ident(iph, &rt->u.dst, sk);
1249	iph->ttl = ttl;
1250	iph->protocol = sk->sk_protocol;
1251	iph->saddr = rt->rt_src;
1252	iph->daddr = rt->rt_dst;
1253	ip_send_check(iph);
1254
1255	skb->priority = sk->sk_priority;
1256	skb->dst = dst_clone(&rt->u.dst);
1257
1258	/* Netfilter gets whole the not fragmented skb. */
1259	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1260		      skb->dst->dev, dst_output);
1261	if (err) {
1262		if (err > 0)
1263			err = inet->recverr ? net_xmit_errno(err) : 0;
1264		if (err)
1265			goto error;
1266	}
1267
1268out:
1269	inet->cork.flags &= ~IPCORK_OPT;
1270	kfree(inet->cork.opt);
1271	inet->cork.opt = NULL;
1272	if (inet->cork.rt) {
1273		ip_rt_put(inet->cork.rt);
1274		inet->cork.rt = NULL;
1275	}
1276	return err;
1277
1278error:
1279	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1280	goto out;
1281}
1282
1283/*
1284 *	Throw away all pending data on the socket.
1285 */
1286void ip_flush_pending_frames(struct sock *sk)
1287{
1288	struct inet_sock *inet = inet_sk(sk);
1289	struct sk_buff *skb;
1290
1291	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1292		kfree_skb(skb);
1293
1294	inet->cork.flags &= ~IPCORK_OPT;
1295	kfree(inet->cork.opt);
1296	inet->cork.opt = NULL;
1297	if (inet->cork.rt) {
1298		ip_rt_put(inet->cork.rt);
1299		inet->cork.rt = NULL;
1300	}
1301}
1302
1303
1304/*
1305 *	Fetch data from kernel space and fill in checksum if needed.
1306 */
1307static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1308			      int len, int odd, struct sk_buff *skb)
1309{
1310	__wsum csum;
1311
1312	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1313	skb->csum = csum_block_add(skb->csum, csum, odd);
1314	return 0;
1315}
1316
1317/*
1318 *	Generic function to send a packet as reply to another packet.
1319 *	Used to send TCP resets so far. ICMP should use this function too.
1320 *
1321 *	Should run single threaded per socket because it uses the sock
1322 *     	structure to pass arguments.
1323 *
1324 *	LATER: switch from ip_build_xmit to ip_append_*
1325 */
1326void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1327		   unsigned int len)
1328{
1329	struct inet_sock *inet = inet_sk(sk);
1330	struct {
1331		struct ip_options	opt;
1332		char			data[40];
1333	} replyopts;
1334	struct ipcm_cookie ipc;
1335	__be32 daddr;
1336	struct rtable *rt = (struct rtable*)skb->dst;
1337
1338	if (ip_options_echo(&replyopts.opt, skb))
1339		return;
1340
1341	daddr = ipc.addr = rt->rt_src;
1342	ipc.opt = NULL;
1343
1344	if (replyopts.opt.optlen) {
1345		ipc.opt = &replyopts.opt;
1346
1347		if (ipc.opt->srr)
1348			daddr = replyopts.opt.faddr;
1349	}
1350
1351	{
1352		struct flowi fl = { .nl_u = { .ip4_u =
1353					      { .daddr = daddr,
1354						.saddr = rt->rt_spec_dst,
1355						.tos = RT_TOS(skb->nh.iph->tos) } },
1356				    /* Not quite clean, but right. */
1357				    .uli_u = { .ports =
1358					       { .sport = skb->h.th->dest,
1359					         .dport = skb->h.th->source } },
1360				    .proto = sk->sk_protocol };
1361		security_skb_classify_flow(skb, &fl);
1362		if (ip_route_output_key(&rt, &fl))
1363			return;
1364	}
1365
1366	/* And let IP do all the hard work.
1367
1368	   This chunk is not reenterable, hence spinlock.
1369	   Note that it uses the fact, that this function is called
1370	   with locally disabled BH and that sk cannot be already spinlocked.
1371	 */
1372	bh_lock_sock(sk);
1373	inet->tos = skb->nh.iph->tos;
1374	sk->sk_priority = skb->priority;
1375	sk->sk_protocol = skb->nh.iph->protocol;
1376	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1377		       &ipc, rt, MSG_DONTWAIT);
1378	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1379		if (arg->csumoffset >= 0)
1380			*((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1381		skb->ip_summed = CHECKSUM_NONE;
1382		ip_push_pending_frames(sk);
1383	}
1384
1385	bh_unlock_sock(sk);
1386
1387	ip_rt_put(rt);
1388}
1389
1390void __init ip_init(void)
1391{
1392	ip_rt_init();
1393	inet_initpeers();
1394
1395#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1396	igmp_mc_proc_init();
1397#endif
1398}
1399
1400EXPORT_SYMBOL(ip_generic_getfrag);
1401EXPORT_SYMBOL(ip_queue_xmit);
1402EXPORT_SYMBOL(ip_send_check);
1403