ip_output.c revision cfacb0577e319b02ed42685a0a8e0f1657ac461b
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The Internet Protocol (IP) output module.
7 *
8 * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9 *
10 * Authors:	Ross Biro
11 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *		Donald Becker, <becker@super.org>
13 *		Alan Cox, <Alan.Cox@linux.org>
14 *		Richard Underwood
15 *		Stefan Becker, <stefanb@yello.ping.de>
16 *		Jorge Cwik, <jorge@laser.satlink.net>
17 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *		Hirokazu Takahashi, <taka@valinux.co.jp>
19 *
20 *	See ip_input.c for original log
21 *
22 *	Fixes:
23 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
24 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
25 *		Bradford Johnson:	Fix faulty handling of some frames when
26 *					no route is found.
27 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
28 *					(in case if packet not accepted by
29 *					output firewall rules)
30 *		Mike McLagan	:	Routing by source
31 *		Alexey Kuznetsov:	use new route cache
32 *		Andi Kleen:		Fix broken PMTU recovery and remove
33 *					some redundant tests.
34 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
35 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
36 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
37 *					for decreased register pressure on x86
38 *					and more readibility.
39 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
40 *					silently drop skb instead of failing with -EPERM.
41 *		Detlev Wengorz	:	Copy protocol for fragments.
42 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
43 *					datagrams.
44 *		Hirokazu Takahashi:	sendfile() on UDP works now.
45 */
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/sched.h>
53#include <linux/mm.h>
54#include <linux/string.h>
55#include <linux/errno.h>
56#include <linux/config.h>
57
58#include <linux/socket.h>
59#include <linux/sockios.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/etherdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/stat.h>
66#include <linux/init.h>
67
68#include <net/snmp.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/xfrm.h>
73#include <linux/skbuff.h>
74#include <net/sock.h>
75#include <net/arp.h>
76#include <net/icmp.h>
77#include <net/checksum.h>
78#include <net/inetpeer.h>
79#include <net/checksum.h>
80#include <linux/igmp.h>
81#include <linux/netfilter_ipv4.h>
82#include <linux/netfilter_bridge.h>
83#include <linux/mroute.h>
84#include <linux/netlink.h>
85#include <linux/tcp.h>
86
87int sysctl_ip_default_ttl = IPDEFTTL;
88
89static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
90
91/* Generate a checksum for an outgoing IP datagram. */
92__inline__ void ip_send_check(struct iphdr *iph)
93{
94	iph->check = 0;
95	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
96}
97
98/* dev_loopback_xmit for use with netfilter. */
99static int ip_dev_loopback_xmit(struct sk_buff *newskb)
100{
101	newskb->mac.raw = newskb->data;
102	__skb_pull(newskb, newskb->nh.raw - newskb->data);
103	newskb->pkt_type = PACKET_LOOPBACK;
104	newskb->ip_summed = CHECKSUM_UNNECESSARY;
105	BUG_TRAP(newskb->dst);
106	netif_rx(newskb);
107	return 0;
108}
109
110static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
111{
112	int ttl = inet->uc_ttl;
113
114	if (ttl < 0)
115		ttl = dst_metric(dst, RTAX_HOPLIMIT);
116	return ttl;
117}
118
119/*
120 *		Add an ip header to a skbuff and send it out.
121 *
122 */
123int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
124			  u32 saddr, u32 daddr, struct ip_options *opt)
125{
126	struct inet_sock *inet = inet_sk(sk);
127	struct rtable *rt = (struct rtable *)skb->dst;
128	struct iphdr *iph;
129
130	/* Build the IP header. */
131	if (opt)
132		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
133	else
134		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
135
136	iph->version  = 4;
137	iph->ihl      = 5;
138	iph->tos      = inet->tos;
139	if (ip_dont_fragment(sk, &rt->u.dst))
140		iph->frag_off = htons(IP_DF);
141	else
142		iph->frag_off = 0;
143	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
144	iph->daddr    = rt->rt_dst;
145	iph->saddr    = rt->rt_src;
146	iph->protocol = sk->sk_protocol;
147	iph->tot_len  = htons(skb->len);
148	ip_select_ident(iph, &rt->u.dst, sk);
149	skb->nh.iph   = iph;
150
151	if (opt && opt->optlen) {
152		iph->ihl += opt->optlen>>2;
153		ip_options_build(skb, opt, daddr, rt, 0);
154	}
155	ip_send_check(iph);
156
157	skb->priority = sk->sk_priority;
158
159	/* Send it out. */
160	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
161		       dst_output);
162}
163
164EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
165
166static inline int ip_finish_output2(struct sk_buff *skb)
167{
168	struct dst_entry *dst = skb->dst;
169	struct hh_cache *hh = dst->hh;
170	struct net_device *dev = dst->dev;
171	int hh_len = LL_RESERVED_SPACE(dev);
172
173	/* Be paranoid, rather than too clever. */
174	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
175		struct sk_buff *skb2;
176
177		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
178		if (skb2 == NULL) {
179			kfree_skb(skb);
180			return -ENOMEM;
181		}
182		if (skb->sk)
183			skb_set_owner_w(skb2, skb->sk);
184		kfree_skb(skb);
185		skb = skb2;
186	}
187
188	if (hh) {
189		int hh_alen;
190
191		read_lock_bh(&hh->hh_lock);
192		hh_alen = HH_DATA_ALIGN(hh->hh_len);
193  		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
194		read_unlock_bh(&hh->hh_lock);
195	        skb_push(skb, hh->hh_len);
196		return hh->hh_output(skb);
197	} else if (dst->neighbour)
198		return dst->neighbour->output(skb);
199
200	if (net_ratelimit())
201		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
202	kfree_skb(skb);
203	return -EINVAL;
204}
205
206static inline int ip_finish_output(struct sk_buff *skb)
207{
208#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
209	/* Policy lookup after SNAT yielded a new policy */
210	if (skb->dst->xfrm != NULL)
211		return xfrm4_output_finish(skb);
212#endif
213	if (skb->len > dst_mtu(skb->dst) &&
214	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
215		return ip_fragment(skb, ip_finish_output2);
216	else
217		return ip_finish_output2(skb);
218}
219
220int ip_mc_output(struct sk_buff *skb)
221{
222	struct sock *sk = skb->sk;
223	struct rtable *rt = (struct rtable*)skb->dst;
224	struct net_device *dev = rt->u.dst.dev;
225
226	/*
227	 *	If the indicated interface is up and running, send the packet.
228	 */
229	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
230
231	skb->dev = dev;
232	skb->protocol = htons(ETH_P_IP);
233
234	/*
235	 *	Multicasts are looped back for other local users
236	 */
237
238	if (rt->rt_flags&RTCF_MULTICAST) {
239		if ((!sk || inet_sk(sk)->mc_loop)
240#ifdef CONFIG_IP_MROUTE
241		/* Small optimization: do not loopback not local frames,
242		   which returned after forwarding; they will be  dropped
243		   by ip_mr_input in any case.
244		   Note, that local frames are looped back to be delivered
245		   to local recipients.
246
247		   This check is duplicated in ip_mr_input at the moment.
248		 */
249		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
250#endif
251		) {
252			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
253			if (newskb)
254				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
255					newskb->dev,
256					ip_dev_loopback_xmit);
257		}
258
259		/* Multicasts with ttl 0 must not go beyond the host */
260
261		if (skb->nh.iph->ttl == 0) {
262			kfree_skb(skb);
263			return 0;
264		}
265	}
266
267	if (rt->rt_flags&RTCF_BROADCAST) {
268		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
269		if (newskb)
270			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
271				newskb->dev, ip_dev_loopback_xmit);
272	}
273
274	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
275		       ip_finish_output);
276}
277
278int ip_output(struct sk_buff *skb)
279{
280	struct net_device *dev = skb->dst->dev;
281
282	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
283
284	skb->dev = dev;
285	skb->protocol = htons(ETH_P_IP);
286
287	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
288		       ip_finish_output);
289}
290
291int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
292{
293	struct sock *sk = skb->sk;
294	struct inet_sock *inet = inet_sk(sk);
295	struct ip_options *opt = inet->opt;
296	struct rtable *rt;
297	struct iphdr *iph;
298
299	/* Skip all of this if the packet is already routed,
300	 * f.e. by something like SCTP.
301	 */
302	rt = (struct rtable *) skb->dst;
303	if (rt != NULL)
304		goto packet_routed;
305
306	/* Make sure we can route this packet. */
307	rt = (struct rtable *)__sk_dst_check(sk, 0);
308	if (rt == NULL) {
309		u32 daddr;
310
311		/* Use correct destination address if we have options. */
312		daddr = inet->daddr;
313		if(opt && opt->srr)
314			daddr = opt->faddr;
315
316		{
317			struct flowi fl = { .oif = sk->sk_bound_dev_if,
318					    .nl_u = { .ip4_u =
319						      { .daddr = daddr,
320							.saddr = inet->saddr,
321							.tos = RT_CONN_FLAGS(sk) } },
322					    .proto = sk->sk_protocol,
323					    .uli_u = { .ports =
324						       { .sport = inet->sport,
325							 .dport = inet->dport } } };
326
327			/* If this fails, retransmit mechanism of transport layer will
328			 * keep trying until route appears or the connection times
329			 * itself out.
330			 */
331			if (ip_route_output_flow(&rt, &fl, sk, 0))
332				goto no_route;
333		}
334		sk_setup_caps(sk, &rt->u.dst);
335	}
336	skb->dst = dst_clone(&rt->u.dst);
337
338packet_routed:
339	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
340		goto no_route;
341
342	/* OK, we know where to send it, allocate and build IP header. */
343	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
344	*((__u16 *)iph)	= htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
345	iph->tot_len = htons(skb->len);
346	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
347		iph->frag_off = htons(IP_DF);
348	else
349		iph->frag_off = 0;
350	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
351	iph->protocol = sk->sk_protocol;
352	iph->saddr    = rt->rt_src;
353	iph->daddr    = rt->rt_dst;
354	skb->nh.iph   = iph;
355	/* Transport layer set skb->h.foo itself. */
356
357	if (opt && opt->optlen) {
358		iph->ihl += opt->optlen >> 2;
359		ip_options_build(skb, opt, inet->daddr, rt, 0);
360	}
361
362	ip_select_ident_more(iph, &rt->u.dst, sk,
363			     (skb_shinfo(skb)->tso_segs ?: 1) - 1);
364
365	/* Add an IP checksum. */
366	ip_send_check(iph);
367
368	skb->priority = sk->sk_priority;
369
370	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
371		       dst_output);
372
373no_route:
374	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
375	kfree_skb(skb);
376	return -EHOSTUNREACH;
377}
378
379
380static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
381{
382	to->pkt_type = from->pkt_type;
383	to->priority = from->priority;
384	to->protocol = from->protocol;
385	dst_release(to->dst);
386	to->dst = dst_clone(from->dst);
387	to->dev = from->dev;
388
389	/* Copy the flags to each fragment. */
390	IPCB(to)->flags = IPCB(from)->flags;
391
392#ifdef CONFIG_NET_SCHED
393	to->tc_index = from->tc_index;
394#endif
395#ifdef CONFIG_NETFILTER
396	to->nfmark = from->nfmark;
397	/* Connection association is same as pre-frag packet */
398	nf_conntrack_put(to->nfct);
399	to->nfct = from->nfct;
400	nf_conntrack_get(to->nfct);
401	to->nfctinfo = from->nfctinfo;
402#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
403	to->ipvs_property = from->ipvs_property;
404#endif
405#ifdef CONFIG_BRIDGE_NETFILTER
406	nf_bridge_put(to->nf_bridge);
407	to->nf_bridge = from->nf_bridge;
408	nf_bridge_get(to->nf_bridge);
409#endif
410#endif
411}
412
413/*
414 *	This IP datagram is too large to be sent in one piece.  Break it up into
415 *	smaller pieces (each of size equal to IP header plus
416 *	a block of the data of the original IP data part) that will yet fit in a
417 *	single device frame, and queue such a frame for sending.
418 */
419
420static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
421{
422	struct iphdr *iph;
423	int raw = 0;
424	int ptr;
425	struct net_device *dev;
426	struct sk_buff *skb2;
427	unsigned int mtu, hlen, left, len, ll_rs;
428	int offset;
429	__be16 not_last_frag;
430	struct rtable *rt = (struct rtable*)skb->dst;
431	int err = 0;
432
433	dev = rt->u.dst.dev;
434
435	/*
436	 *	Point into the IP datagram header.
437	 */
438
439	iph = skb->nh.iph;
440
441	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
442		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
443			  htonl(dst_mtu(&rt->u.dst)));
444		kfree_skb(skb);
445		return -EMSGSIZE;
446	}
447
448	/*
449	 *	Setup starting values.
450	 */
451
452	hlen = iph->ihl * 4;
453	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
454	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
455
456	/* When frag_list is given, use it. First, check its validity:
457	 * some transformers could create wrong frag_list or break existing
458	 * one, it is not prohibited. In this case fall back to copying.
459	 *
460	 * LATER: this step can be merged to real generation of fragments,
461	 * we can switch to copy when see the first bad fragment.
462	 */
463	if (skb_shinfo(skb)->frag_list) {
464		struct sk_buff *frag;
465		int first_len = skb_pagelen(skb);
466
467		if (first_len - hlen > mtu ||
468		    ((first_len - hlen) & 7) ||
469		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
470		    skb_cloned(skb))
471			goto slow_path;
472
473		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
474			/* Correct geometry. */
475			if (frag->len > mtu ||
476			    ((frag->len & 7) && frag->next) ||
477			    skb_headroom(frag) < hlen)
478			    goto slow_path;
479
480			/* Partially cloned skb? */
481			if (skb_shared(frag))
482				goto slow_path;
483
484			BUG_ON(frag->sk);
485			if (skb->sk) {
486				sock_hold(skb->sk);
487				frag->sk = skb->sk;
488				frag->destructor = sock_wfree;
489				skb->truesize -= frag->truesize;
490			}
491		}
492
493		/* Everything is OK. Generate! */
494
495		err = 0;
496		offset = 0;
497		frag = skb_shinfo(skb)->frag_list;
498		skb_shinfo(skb)->frag_list = NULL;
499		skb->data_len = first_len - skb_headlen(skb);
500		skb->len = first_len;
501		iph->tot_len = htons(first_len);
502		iph->frag_off = htons(IP_MF);
503		ip_send_check(iph);
504
505		for (;;) {
506			/* Prepare header of the next frame,
507			 * before previous one went down. */
508			if (frag) {
509				frag->ip_summed = CHECKSUM_NONE;
510				frag->h.raw = frag->data;
511				frag->nh.raw = __skb_push(frag, hlen);
512				memcpy(frag->nh.raw, iph, hlen);
513				iph = frag->nh.iph;
514				iph->tot_len = htons(frag->len);
515				ip_copy_metadata(frag, skb);
516				if (offset == 0)
517					ip_options_fragment(frag);
518				offset += skb->len - hlen;
519				iph->frag_off = htons(offset>>3);
520				if (frag->next != NULL)
521					iph->frag_off |= htons(IP_MF);
522				/* Ready, complete checksum */
523				ip_send_check(iph);
524			}
525
526			err = output(skb);
527
528			if (err || !frag)
529				break;
530
531			skb = frag;
532			frag = skb->next;
533			skb->next = NULL;
534		}
535
536		if (err == 0) {
537			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
538			return 0;
539		}
540
541		while (frag) {
542			skb = frag->next;
543			kfree_skb(frag);
544			frag = skb;
545		}
546		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
547		return err;
548	}
549
550slow_path:
551	left = skb->len - hlen;		/* Space per frame */
552	ptr = raw + hlen;		/* Where to start from */
553
554#ifdef CONFIG_BRIDGE_NETFILTER
555	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
556	 * we need to make room for the encapsulating header */
557	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
558	mtu -= nf_bridge_pad(skb);
559#else
560	ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
561#endif
562	/*
563	 *	Fragment the datagram.
564	 */
565
566	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
567	not_last_frag = iph->frag_off & htons(IP_MF);
568
569	/*
570	 *	Keep copying data until we run out.
571	 */
572
573	while(left > 0)	{
574		len = left;
575		/* IF: it doesn't fit, use 'mtu' - the data space left */
576		if (len > mtu)
577			len = mtu;
578		/* IF: we are not sending upto and including the packet end
579		   then align the next start on an eight byte boundary */
580		if (len < left)	{
581			len &= ~7;
582		}
583		/*
584		 *	Allocate buffer.
585		 */
586
587		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
588			NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
589			err = -ENOMEM;
590			goto fail;
591		}
592
593		/*
594		 *	Set up data on packet
595		 */
596
597		ip_copy_metadata(skb2, skb);
598		skb_reserve(skb2, ll_rs);
599		skb_put(skb2, len + hlen);
600		skb2->nh.raw = skb2->data;
601		skb2->h.raw = skb2->data + hlen;
602
603		/*
604		 *	Charge the memory for the fragment to any owner
605		 *	it might possess
606		 */
607
608		if (skb->sk)
609			skb_set_owner_w(skb2, skb->sk);
610
611		/*
612		 *	Copy the packet header into the new buffer.
613		 */
614
615		memcpy(skb2->nh.raw, skb->data, hlen);
616
617		/*
618		 *	Copy a block of the IP datagram.
619		 */
620		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
621			BUG();
622		left -= len;
623
624		/*
625		 *	Fill in the new header fields.
626		 */
627		iph = skb2->nh.iph;
628		iph->frag_off = htons((offset >> 3));
629
630		/* ANK: dirty, but effective trick. Upgrade options only if
631		 * the segment to be fragmented was THE FIRST (otherwise,
632		 * options are already fixed) and make it ONCE
633		 * on the initial skb, so that all the following fragments
634		 * will inherit fixed options.
635		 */
636		if (offset == 0)
637			ip_options_fragment(skb);
638
639		/*
640		 *	Added AC : If we are fragmenting a fragment that's not the
641		 *		   last fragment then keep MF on each bit
642		 */
643		if (left > 0 || not_last_frag)
644			iph->frag_off |= htons(IP_MF);
645		ptr += len;
646		offset += len;
647
648		/*
649		 *	Put this fragment into the sending queue.
650		 */
651
652		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
653
654		iph->tot_len = htons(len + hlen);
655
656		ip_send_check(iph);
657
658		err = output(skb2);
659		if (err)
660			goto fail;
661	}
662	kfree_skb(skb);
663	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
664	return err;
665
666fail:
667	kfree_skb(skb);
668	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
669	return err;
670}
671
672int
673ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
674{
675	struct iovec *iov = from;
676
677	if (skb->ip_summed == CHECKSUM_HW) {
678		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
679			return -EFAULT;
680	} else {
681		unsigned int csum = 0;
682		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
683			return -EFAULT;
684		skb->csum = csum_block_add(skb->csum, csum, odd);
685	}
686	return 0;
687}
688
689static inline unsigned int
690csum_page(struct page *page, int offset, int copy)
691{
692	char *kaddr;
693	unsigned int csum;
694	kaddr = kmap(page);
695	csum = csum_partial(kaddr + offset, copy, 0);
696	kunmap(page);
697	return csum;
698}
699
700static inline int ip_ufo_append_data(struct sock *sk,
701			int getfrag(void *from, char *to, int offset, int len,
702			       int odd, struct sk_buff *skb),
703			void *from, int length, int hh_len, int fragheaderlen,
704			int transhdrlen, int mtu,unsigned int flags)
705{
706	struct sk_buff *skb;
707	int err;
708
709	/* There is support for UDP fragmentation offload by network
710	 * device, so create one single skb packet containing complete
711	 * udp datagram
712	 */
713	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
714		skb = sock_alloc_send_skb(sk,
715			hh_len + fragheaderlen + transhdrlen + 20,
716			(flags & MSG_DONTWAIT), &err);
717
718		if (skb == NULL)
719			return err;
720
721		/* reserve space for Hardware header */
722		skb_reserve(skb, hh_len);
723
724		/* create space for UDP/IP header */
725		skb_put(skb,fragheaderlen + transhdrlen);
726
727		/* initialize network header pointer */
728		skb->nh.raw = skb->data;
729
730		/* initialize protocol header pointer */
731		skb->h.raw = skb->data + fragheaderlen;
732
733		skb->ip_summed = CHECKSUM_HW;
734		skb->csum = 0;
735		sk->sk_sndmsg_off = 0;
736	}
737
738	err = skb_append_datato_frags(sk,skb, getfrag, from,
739			       (length - transhdrlen));
740	if (!err) {
741		/* specify the length of each IP datagram fragment*/
742		skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
743		__skb_queue_tail(&sk->sk_write_queue, skb);
744
745		return 0;
746	}
747	/* There is not enough support do UFO ,
748	 * so follow normal path
749	 */
750	kfree_skb(skb);
751	return err;
752}
753
754/*
755 *	ip_append_data() and ip_append_page() can make one large IP datagram
756 *	from many pieces of data. Each pieces will be holded on the socket
757 *	until ip_push_pending_frames() is called. Each piece can be a page
758 *	or non-page data.
759 *
760 *	Not only UDP, other transport protocols - e.g. raw sockets - can use
761 *	this interface potentially.
762 *
763 *	LATER: length must be adjusted by pad at tail, when it is required.
764 */
765int ip_append_data(struct sock *sk,
766		   int getfrag(void *from, char *to, int offset, int len,
767			       int odd, struct sk_buff *skb),
768		   void *from, int length, int transhdrlen,
769		   struct ipcm_cookie *ipc, struct rtable *rt,
770		   unsigned int flags)
771{
772	struct inet_sock *inet = inet_sk(sk);
773	struct sk_buff *skb;
774
775	struct ip_options *opt = NULL;
776	int hh_len;
777	int exthdrlen;
778	int mtu;
779	int copy;
780	int err;
781	int offset = 0;
782	unsigned int maxfraglen, fragheaderlen;
783	int csummode = CHECKSUM_NONE;
784
785	if (flags&MSG_PROBE)
786		return 0;
787
788	if (skb_queue_empty(&sk->sk_write_queue)) {
789		/*
790		 * setup for corking.
791		 */
792		opt = ipc->opt;
793		if (opt) {
794			if (inet->cork.opt == NULL) {
795				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
796				if (unlikely(inet->cork.opt == NULL))
797					return -ENOBUFS;
798			}
799			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
800			inet->cork.flags |= IPCORK_OPT;
801			inet->cork.addr = ipc->addr;
802		}
803		dst_hold(&rt->u.dst);
804		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
805		inet->cork.rt = rt;
806		inet->cork.length = 0;
807		sk->sk_sndmsg_page = NULL;
808		sk->sk_sndmsg_off = 0;
809		if ((exthdrlen = rt->u.dst.header_len) != 0) {
810			length += exthdrlen;
811			transhdrlen += exthdrlen;
812		}
813	} else {
814		rt = inet->cork.rt;
815		if (inet->cork.flags & IPCORK_OPT)
816			opt = inet->cork.opt;
817
818		transhdrlen = 0;
819		exthdrlen = 0;
820		mtu = inet->cork.fragsize;
821	}
822	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
823
824	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
825	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
826
827	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
828		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
829		return -EMSGSIZE;
830	}
831
832	/*
833	 * transhdrlen > 0 means that this is the first fragment and we wish
834	 * it won't be fragmented in the future.
835	 */
836	if (transhdrlen &&
837	    length + fragheaderlen <= mtu &&
838	    rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
839	    !exthdrlen)
840		csummode = CHECKSUM_HW;
841
842	inet->cork.length += length;
843	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
844			(rt->u.dst.dev->features & NETIF_F_UFO)) {
845
846		if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
847			       fragheaderlen, transhdrlen, mtu, flags))
848			goto error;
849
850		return 0;
851	}
852
853	/* So, what's going on in the loop below?
854	 *
855	 * We use calculated fragment length to generate chained skb,
856	 * each of segments is IP fragment ready for sending to network after
857	 * adding appropriate IP header.
858	 */
859
860	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
861		goto alloc_new_skb;
862
863	while (length > 0) {
864		/* Check if the remaining data fits into current packet. */
865		copy = mtu - skb->len;
866		if (copy < length)
867			copy = maxfraglen - skb->len;
868		if (copy <= 0) {
869			char *data;
870			unsigned int datalen;
871			unsigned int fraglen;
872			unsigned int fraggap;
873			unsigned int alloclen;
874			struct sk_buff *skb_prev;
875alloc_new_skb:
876			skb_prev = skb;
877			if (skb_prev)
878				fraggap = skb_prev->len - maxfraglen;
879			else
880				fraggap = 0;
881
882			/*
883			 * If remaining data exceeds the mtu,
884			 * we know we need more fragment(s).
885			 */
886			datalen = length + fraggap;
887			if (datalen > mtu - fragheaderlen)
888				datalen = maxfraglen - fragheaderlen;
889			fraglen = datalen + fragheaderlen;
890
891			if ((flags & MSG_MORE) &&
892			    !(rt->u.dst.dev->features&NETIF_F_SG))
893				alloclen = mtu;
894			else
895				alloclen = datalen + fragheaderlen;
896
897			/* The last fragment gets additional space at tail.
898			 * Note, with MSG_MORE we overallocate on fragments,
899			 * because we have no idea what fragment will be
900			 * the last.
901			 */
902			if (datalen == length)
903				alloclen += rt->u.dst.trailer_len;
904
905			if (transhdrlen) {
906				skb = sock_alloc_send_skb(sk,
907						alloclen + hh_len + 15,
908						(flags & MSG_DONTWAIT), &err);
909			} else {
910				skb = NULL;
911				if (atomic_read(&sk->sk_wmem_alloc) <=
912				    2 * sk->sk_sndbuf)
913					skb = sock_wmalloc(sk,
914							   alloclen + hh_len + 15, 1,
915							   sk->sk_allocation);
916				if (unlikely(skb == NULL))
917					err = -ENOBUFS;
918			}
919			if (skb == NULL)
920				goto error;
921
922			/*
923			 *	Fill in the control structures
924			 */
925			skb->ip_summed = csummode;
926			skb->csum = 0;
927			skb_reserve(skb, hh_len);
928
929			/*
930			 *	Find where to start putting bytes.
931			 */
932			data = skb_put(skb, fraglen);
933			skb->nh.raw = data + exthdrlen;
934			data += fragheaderlen;
935			skb->h.raw = data + exthdrlen;
936
937			if (fraggap) {
938				skb->csum = skb_copy_and_csum_bits(
939					skb_prev, maxfraglen,
940					data + transhdrlen, fraggap, 0);
941				skb_prev->csum = csum_sub(skb_prev->csum,
942							  skb->csum);
943				data += fraggap;
944				skb_trim(skb_prev, maxfraglen);
945			}
946
947			copy = datalen - transhdrlen - fraggap;
948			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
949				err = -EFAULT;
950				kfree_skb(skb);
951				goto error;
952			}
953
954			offset += copy;
955			length -= datalen - fraggap;
956			transhdrlen = 0;
957			exthdrlen = 0;
958			csummode = CHECKSUM_NONE;
959
960			/*
961			 * Put the packet on the pending queue.
962			 */
963			__skb_queue_tail(&sk->sk_write_queue, skb);
964			continue;
965		}
966
967		if (copy > length)
968			copy = length;
969
970		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
971			unsigned int off;
972
973			off = skb->len;
974			if (getfrag(from, skb_put(skb, copy),
975					offset, copy, off, skb) < 0) {
976				__skb_trim(skb, off);
977				err = -EFAULT;
978				goto error;
979			}
980		} else {
981			int i = skb_shinfo(skb)->nr_frags;
982			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
983			struct page *page = sk->sk_sndmsg_page;
984			int off = sk->sk_sndmsg_off;
985			unsigned int left;
986
987			if (page && (left = PAGE_SIZE - off) > 0) {
988				if (copy >= left)
989					copy = left;
990				if (page != frag->page) {
991					if (i == MAX_SKB_FRAGS) {
992						err = -EMSGSIZE;
993						goto error;
994					}
995					get_page(page);
996	 				skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
997					frag = &skb_shinfo(skb)->frags[i];
998				}
999			} else if (i < MAX_SKB_FRAGS) {
1000				if (copy > PAGE_SIZE)
1001					copy = PAGE_SIZE;
1002				page = alloc_pages(sk->sk_allocation, 0);
1003				if (page == NULL)  {
1004					err = -ENOMEM;
1005					goto error;
1006				}
1007				sk->sk_sndmsg_page = page;
1008				sk->sk_sndmsg_off = 0;
1009
1010				skb_fill_page_desc(skb, i, page, 0, 0);
1011				frag = &skb_shinfo(skb)->frags[i];
1012				skb->truesize += PAGE_SIZE;
1013				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1014			} else {
1015				err = -EMSGSIZE;
1016				goto error;
1017			}
1018			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1019				err = -EFAULT;
1020				goto error;
1021			}
1022			sk->sk_sndmsg_off += copy;
1023			frag->size += copy;
1024			skb->len += copy;
1025			skb->data_len += copy;
1026		}
1027		offset += copy;
1028		length -= copy;
1029	}
1030
1031	return 0;
1032
1033error:
1034	inet->cork.length -= length;
1035	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1036	return err;
1037}
1038
1039ssize_t	ip_append_page(struct sock *sk, struct page *page,
1040		       int offset, size_t size, int flags)
1041{
1042	struct inet_sock *inet = inet_sk(sk);
1043	struct sk_buff *skb;
1044	struct rtable *rt;
1045	struct ip_options *opt = NULL;
1046	int hh_len;
1047	int mtu;
1048	int len;
1049	int err;
1050	unsigned int maxfraglen, fragheaderlen, fraggap;
1051
1052	if (inet->hdrincl)
1053		return -EPERM;
1054
1055	if (flags&MSG_PROBE)
1056		return 0;
1057
1058	if (skb_queue_empty(&sk->sk_write_queue))
1059		return -EINVAL;
1060
1061	rt = inet->cork.rt;
1062	if (inet->cork.flags & IPCORK_OPT)
1063		opt = inet->cork.opt;
1064
1065	if (!(rt->u.dst.dev->features&NETIF_F_SG))
1066		return -EOPNOTSUPP;
1067
1068	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1069	mtu = inet->cork.fragsize;
1070
1071	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1072	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1073
1074	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1075		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1076		return -EMSGSIZE;
1077	}
1078
1079	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1080		return -EINVAL;
1081
1082	inet->cork.length += size;
1083	if ((sk->sk_protocol == IPPROTO_UDP) &&
1084	    (rt->u.dst.dev->features & NETIF_F_UFO))
1085		skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
1086
1087
1088	while (size > 0) {
1089		int i;
1090
1091		if (skb_shinfo(skb)->ufo_size)
1092			len = size;
1093		else {
1094
1095			/* Check if the remaining data fits into current packet. */
1096			len = mtu - skb->len;
1097			if (len < size)
1098				len = maxfraglen - skb->len;
1099		}
1100		if (len <= 0) {
1101			struct sk_buff *skb_prev;
1102			char *data;
1103			struct iphdr *iph;
1104			int alloclen;
1105
1106			skb_prev = skb;
1107			fraggap = skb_prev->len - maxfraglen;
1108
1109			alloclen = fragheaderlen + hh_len + fraggap + 15;
1110			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1111			if (unlikely(!skb)) {
1112				err = -ENOBUFS;
1113				goto error;
1114			}
1115
1116			/*
1117			 *	Fill in the control structures
1118			 */
1119			skb->ip_summed = CHECKSUM_NONE;
1120			skb->csum = 0;
1121			skb_reserve(skb, hh_len);
1122
1123			/*
1124			 *	Find where to start putting bytes.
1125			 */
1126			data = skb_put(skb, fragheaderlen + fraggap);
1127			skb->nh.iph = iph = (struct iphdr *)data;
1128			data += fragheaderlen;
1129			skb->h.raw = data;
1130
1131			if (fraggap) {
1132				skb->csum = skb_copy_and_csum_bits(
1133					skb_prev, maxfraglen,
1134					data, fraggap, 0);
1135				skb_prev->csum = csum_sub(skb_prev->csum,
1136							  skb->csum);
1137				skb_trim(skb_prev, maxfraglen);
1138			}
1139
1140			/*
1141			 * Put the packet on the pending queue.
1142			 */
1143			__skb_queue_tail(&sk->sk_write_queue, skb);
1144			continue;
1145		}
1146
1147		i = skb_shinfo(skb)->nr_frags;
1148		if (len > size)
1149			len = size;
1150		if (skb_can_coalesce(skb, i, page, offset)) {
1151			skb_shinfo(skb)->frags[i-1].size += len;
1152		} else if (i < MAX_SKB_FRAGS) {
1153			get_page(page);
1154			skb_fill_page_desc(skb, i, page, offset, len);
1155		} else {
1156			err = -EMSGSIZE;
1157			goto error;
1158		}
1159
1160		if (skb->ip_summed == CHECKSUM_NONE) {
1161			unsigned int csum;
1162			csum = csum_page(page, offset, len);
1163			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1164		}
1165
1166		skb->len += len;
1167		skb->data_len += len;
1168		offset += len;
1169		size -= len;
1170	}
1171	return 0;
1172
1173error:
1174	inet->cork.length -= size;
1175	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1176	return err;
1177}
1178
1179/*
1180 *	Combined all pending IP fragments on the socket as one IP datagram
1181 *	and push them out.
1182 */
1183int ip_push_pending_frames(struct sock *sk)
1184{
1185	struct sk_buff *skb, *tmp_skb;
1186	struct sk_buff **tail_skb;
1187	struct inet_sock *inet = inet_sk(sk);
1188	struct ip_options *opt = NULL;
1189	struct rtable *rt = inet->cork.rt;
1190	struct iphdr *iph;
1191	__be16 df = 0;
1192	__u8 ttl;
1193	int err = 0;
1194
1195	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1196		goto out;
1197	tail_skb = &(skb_shinfo(skb)->frag_list);
1198
1199	/* move skb->data to ip header from ext header */
1200	if (skb->data < skb->nh.raw)
1201		__skb_pull(skb, skb->nh.raw - skb->data);
1202	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1203		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1204		*tail_skb = tmp_skb;
1205		tail_skb = &(tmp_skb->next);
1206		skb->len += tmp_skb->len;
1207		skb->data_len += tmp_skb->len;
1208		skb->truesize += tmp_skb->truesize;
1209		__sock_put(tmp_skb->sk);
1210		tmp_skb->destructor = NULL;
1211		tmp_skb->sk = NULL;
1212	}
1213
1214	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1215	 * to fragment the frame generated here. No matter, what transforms
1216	 * how transforms change size of the packet, it will come out.
1217	 */
1218	if (inet->pmtudisc != IP_PMTUDISC_DO)
1219		skb->local_df = 1;
1220
1221	/* DF bit is set when we want to see DF on outgoing frames.
1222	 * If local_df is set too, we still allow to fragment this frame
1223	 * locally. */
1224	if (inet->pmtudisc == IP_PMTUDISC_DO ||
1225	    (skb->len <= dst_mtu(&rt->u.dst) &&
1226	     ip_dont_fragment(sk, &rt->u.dst)))
1227		df = htons(IP_DF);
1228
1229	if (inet->cork.flags & IPCORK_OPT)
1230		opt = inet->cork.opt;
1231
1232	if (rt->rt_type == RTN_MULTICAST)
1233		ttl = inet->mc_ttl;
1234	else
1235		ttl = ip_select_ttl(inet, &rt->u.dst);
1236
1237	iph = (struct iphdr *)skb->data;
1238	iph->version = 4;
1239	iph->ihl = 5;
1240	if (opt) {
1241		iph->ihl += opt->optlen>>2;
1242		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1243	}
1244	iph->tos = inet->tos;
1245	iph->tot_len = htons(skb->len);
1246	iph->frag_off = df;
1247	if (!df) {
1248		__ip_select_ident(iph, &rt->u.dst, 0);
1249	} else {
1250		iph->id = htons(inet->id++);
1251	}
1252	iph->ttl = ttl;
1253	iph->protocol = sk->sk_protocol;
1254	iph->saddr = rt->rt_src;
1255	iph->daddr = rt->rt_dst;
1256	ip_send_check(iph);
1257
1258	skb->priority = sk->sk_priority;
1259	skb->dst = dst_clone(&rt->u.dst);
1260
1261	/* Netfilter gets whole the not fragmented skb. */
1262	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1263		      skb->dst->dev, dst_output);
1264	if (err) {
1265		if (err > 0)
1266			err = inet->recverr ? net_xmit_errno(err) : 0;
1267		if (err)
1268			goto error;
1269	}
1270
1271out:
1272	inet->cork.flags &= ~IPCORK_OPT;
1273	kfree(inet->cork.opt);
1274	inet->cork.opt = NULL;
1275	if (inet->cork.rt) {
1276		ip_rt_put(inet->cork.rt);
1277		inet->cork.rt = NULL;
1278	}
1279	return err;
1280
1281error:
1282	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1283	goto out;
1284}
1285
1286/*
1287 *	Throw away all pending data on the socket.
1288 */
1289void ip_flush_pending_frames(struct sock *sk)
1290{
1291	struct inet_sock *inet = inet_sk(sk);
1292	struct sk_buff *skb;
1293
1294	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1295		kfree_skb(skb);
1296
1297	inet->cork.flags &= ~IPCORK_OPT;
1298	kfree(inet->cork.opt);
1299	inet->cork.opt = NULL;
1300	if (inet->cork.rt) {
1301		ip_rt_put(inet->cork.rt);
1302		inet->cork.rt = NULL;
1303	}
1304}
1305
1306
1307/*
1308 *	Fetch data from kernel space and fill in checksum if needed.
1309 */
1310static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1311			      int len, int odd, struct sk_buff *skb)
1312{
1313	unsigned int csum;
1314
1315	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1316	skb->csum = csum_block_add(skb->csum, csum, odd);
1317	return 0;
1318}
1319
1320/*
1321 *	Generic function to send a packet as reply to another packet.
1322 *	Used to send TCP resets so far. ICMP should use this function too.
1323 *
1324 *	Should run single threaded per socket because it uses the sock
1325 *     	structure to pass arguments.
1326 *
1327 *	LATER: switch from ip_build_xmit to ip_append_*
1328 */
1329void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1330		   unsigned int len)
1331{
1332	struct inet_sock *inet = inet_sk(sk);
1333	struct {
1334		struct ip_options	opt;
1335		char			data[40];
1336	} replyopts;
1337	struct ipcm_cookie ipc;
1338	u32 daddr;
1339	struct rtable *rt = (struct rtable*)skb->dst;
1340
1341	if (ip_options_echo(&replyopts.opt, skb))
1342		return;
1343
1344	daddr = ipc.addr = rt->rt_src;
1345	ipc.opt = NULL;
1346
1347	if (replyopts.opt.optlen) {
1348		ipc.opt = &replyopts.opt;
1349
1350		if (ipc.opt->srr)
1351			daddr = replyopts.opt.faddr;
1352	}
1353
1354	{
1355		struct flowi fl = { .nl_u = { .ip4_u =
1356					      { .daddr = daddr,
1357						.saddr = rt->rt_spec_dst,
1358						.tos = RT_TOS(skb->nh.iph->tos) } },
1359				    /* Not quite clean, but right. */
1360				    .uli_u = { .ports =
1361					       { .sport = skb->h.th->dest,
1362					         .dport = skb->h.th->source } },
1363				    .proto = sk->sk_protocol };
1364		if (ip_route_output_key(&rt, &fl))
1365			return;
1366	}
1367
1368	/* And let IP do all the hard work.
1369
1370	   This chunk is not reenterable, hence spinlock.
1371	   Note that it uses the fact, that this function is called
1372	   with locally disabled BH and that sk cannot be already spinlocked.
1373	 */
1374	bh_lock_sock(sk);
1375	inet->tos = skb->nh.iph->tos;
1376	sk->sk_priority = skb->priority;
1377	sk->sk_protocol = skb->nh.iph->protocol;
1378	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1379		       &ipc, rt, MSG_DONTWAIT);
1380	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1381		if (arg->csumoffset >= 0)
1382			*((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1383		skb->ip_summed = CHECKSUM_NONE;
1384		ip_push_pending_frames(sk);
1385	}
1386
1387	bh_unlock_sock(sk);
1388
1389	ip_rt_put(rt);
1390}
1391
1392void __init ip_init(void)
1393{
1394	ip_rt_init();
1395	inet_initpeers();
1396
1397#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1398	igmp_mc_proc_init();
1399#endif
1400}
1401
1402EXPORT_SYMBOL(ip_generic_getfrag);
1403EXPORT_SYMBOL(ip_queue_xmit);
1404EXPORT_SYMBOL(ip_send_check);
1405