ip_output.c revision 429f08e950a88cd826b203ea898c2f2d0f7db9de
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The Internet Protocol (IP) output module.
7 *
8 * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9 *
10 * Authors:	Ross Biro
11 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *		Donald Becker, <becker@super.org>
13 *		Alan Cox, <Alan.Cox@linux.org>
14 *		Richard Underwood
15 *		Stefan Becker, <stefanb@yello.ping.de>
16 *		Jorge Cwik, <jorge@laser.satlink.net>
17 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *		Hirokazu Takahashi, <taka@valinux.co.jp>
19 *
20 *	See ip_input.c for original log
21 *
22 *	Fixes:
23 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
24 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
25 *		Bradford Johnson:	Fix faulty handling of some frames when
26 *					no route is found.
27 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
28 *					(in case if packet not accepted by
29 *					output firewall rules)
30 *		Mike McLagan	:	Routing by source
31 *		Alexey Kuznetsov:	use new route cache
32 *		Andi Kleen:		Fix broken PMTU recovery and remove
33 *					some redundant tests.
34 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
35 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
36 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
37 *					for decreased register pressure on x86
38 *					and more readibility.
39 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
40 *					silently drop skb instead of failing with -EPERM.
41 *		Detlev Wengorz	:	Copy protocol for fragments.
42 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
43 *					datagrams.
44 *		Hirokazu Takahashi:	sendfile() on UDP works now.
45 */
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/mm.h>
53#include <linux/string.h>
54#include <linux/errno.h>
55#include <linux/highmem.h>
56
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/in.h>
60#include <linux/inet.h>
61#include <linux/netdevice.h>
62#include <linux/etherdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/stat.h>
65#include <linux/init.h>
66
67#include <net/snmp.h>
68#include <net/ip.h>
69#include <net/protocol.h>
70#include <net/route.h>
71#include <net/xfrm.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <net/arp.h>
75#include <net/icmp.h>
76#include <net/checksum.h>
77#include <net/inetpeer.h>
78#include <linux/igmp.h>
79#include <linux/netfilter_ipv4.h>
80#include <linux/netfilter_bridge.h>
81#include <linux/mroute.h>
82#include <linux/netlink.h>
83#include <linux/tcp.h>
84
85int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
86
87/* Generate a checksum for an outgoing IP datagram. */
88__inline__ void ip_send_check(struct iphdr *iph)
89{
90	iph->check = 0;
91	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
92}
93
94/* dev_loopback_xmit for use with netfilter. */
95static int ip_dev_loopback_xmit(struct sk_buff *newskb)
96{
97	skb_reset_mac_header(newskb);
98	__skb_pull(newskb, skb_network_offset(newskb));
99	newskb->pkt_type = PACKET_LOOPBACK;
100	newskb->ip_summed = CHECKSUM_UNNECESSARY;
101	BUG_TRAP(newskb->dst);
102	netif_rx(newskb);
103	return 0;
104}
105
106static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
107{
108	int ttl = inet->uc_ttl;
109
110	if (ttl < 0)
111		ttl = dst_metric(dst, RTAX_HOPLIMIT);
112	return ttl;
113}
114
115/*
116 *		Add an ip header to a skbuff and send it out.
117 *
118 */
119int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
120			  __be32 saddr, __be32 daddr, struct ip_options *opt)
121{
122	struct inet_sock *inet = inet_sk(sk);
123	struct rtable *rt = (struct rtable *)skb->dst;
124	struct iphdr *iph;
125
126	/* Build the IP header. */
127	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
128	skb_reset_network_header(skb);
129	iph = ip_hdr(skb);
130	iph->version  = 4;
131	iph->ihl      = 5;
132	iph->tos      = inet->tos;
133	if (ip_dont_fragment(sk, &rt->u.dst))
134		iph->frag_off = htons(IP_DF);
135	else
136		iph->frag_off = 0;
137	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
138	iph->daddr    = rt->rt_dst;
139	iph->saddr    = rt->rt_src;
140	iph->protocol = sk->sk_protocol;
141	iph->tot_len  = htons(skb->len);
142	ip_select_ident(iph, &rt->u.dst, sk);
143
144	if (opt && opt->optlen) {
145		iph->ihl += opt->optlen>>2;
146		ip_options_build(skb, opt, daddr, rt, 0);
147	}
148	ip_send_check(iph);
149
150	skb->priority = sk->sk_priority;
151
152	/* Send it out. */
153	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
154		       dst_output);
155}
156
157EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
158
159static inline int ip_finish_output2(struct sk_buff *skb)
160{
161	struct dst_entry *dst = skb->dst;
162	struct rtable *rt = (struct rtable *)dst;
163	struct net_device *dev = dst->dev;
164	unsigned int hh_len = LL_RESERVED_SPACE(dev);
165
166	if (rt->rt_type == RTN_MULTICAST)
167		IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
168	else if (rt->rt_type == RTN_BROADCAST)
169		IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
170
171	/* Be paranoid, rather than too clever. */
172	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
173		struct sk_buff *skb2;
174
175		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
176		if (skb2 == NULL) {
177			kfree_skb(skb);
178			return -ENOMEM;
179		}
180		if (skb->sk)
181			skb_set_owner_w(skb2, skb->sk);
182		kfree_skb(skb);
183		skb = skb2;
184	}
185
186	if (dst->hh)
187		return neigh_hh_output(dst->hh, skb);
188	else if (dst->neighbour)
189		return dst->neighbour->output(skb);
190
191	if (net_ratelimit())
192		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
193	kfree_skb(skb);
194	return -EINVAL;
195}
196
197static inline int ip_skb_dst_mtu(struct sk_buff *skb)
198{
199	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
200
201	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
202	       skb->dst->dev->mtu : dst_mtu(skb->dst);
203}
204
205static int ip_finish_output(struct sk_buff *skb)
206{
207#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
208	/* Policy lookup after SNAT yielded a new policy */
209	if (skb->dst->xfrm != NULL) {
210		IPCB(skb)->flags |= IPSKB_REROUTED;
211		return dst_output(skb);
212	}
213#endif
214	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
215		return ip_fragment(skb, ip_finish_output2);
216	else
217		return ip_finish_output2(skb);
218}
219
220int ip_mc_output(struct sk_buff *skb)
221{
222	struct sock *sk = skb->sk;
223	struct rtable *rt = (struct rtable*)skb->dst;
224	struct net_device *dev = rt->u.dst.dev;
225
226	/*
227	 *	If the indicated interface is up and running, send the packet.
228	 */
229	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
230
231	skb->dev = dev;
232	skb->protocol = htons(ETH_P_IP);
233
234	/*
235	 *	Multicasts are looped back for other local users
236	 */
237
238	if (rt->rt_flags&RTCF_MULTICAST) {
239		if ((!sk || inet_sk(sk)->mc_loop)
240#ifdef CONFIG_IP_MROUTE
241		/* Small optimization: do not loopback not local frames,
242		   which returned after forwarding; they will be  dropped
243		   by ip_mr_input in any case.
244		   Note, that local frames are looped back to be delivered
245		   to local recipients.
246
247		   This check is duplicated in ip_mr_input at the moment.
248		 */
249		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
250#endif
251		) {
252			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
253			if (newskb)
254				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
255					newskb->dev,
256					ip_dev_loopback_xmit);
257		}
258
259		/* Multicasts with ttl 0 must not go beyond the host */
260
261		if (ip_hdr(skb)->ttl == 0) {
262			kfree_skb(skb);
263			return 0;
264		}
265	}
266
267	if (rt->rt_flags&RTCF_BROADCAST) {
268		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
269		if (newskb)
270			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
271				newskb->dev, ip_dev_loopback_xmit);
272	}
273
274	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
275			    ip_finish_output,
276			    !(IPCB(skb)->flags & IPSKB_REROUTED));
277}
278
279int ip_output(struct sk_buff *skb)
280{
281	struct net_device *dev = skb->dst->dev;
282
283	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
284
285	skb->dev = dev;
286	skb->protocol = htons(ETH_P_IP);
287
288	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
289			    ip_finish_output,
290			    !(IPCB(skb)->flags & IPSKB_REROUTED));
291}
292
293int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
294{
295	struct sock *sk = skb->sk;
296	struct inet_sock *inet = inet_sk(sk);
297	struct ip_options *opt = inet->opt;
298	struct rtable *rt;
299	struct iphdr *iph;
300
301	/* Skip all of this if the packet is already routed,
302	 * f.e. by something like SCTP.
303	 */
304	rt = (struct rtable *) skb->dst;
305	if (rt != NULL)
306		goto packet_routed;
307
308	/* Make sure we can route this packet. */
309	rt = (struct rtable *)__sk_dst_check(sk, 0);
310	if (rt == NULL) {
311		__be32 daddr;
312
313		/* Use correct destination address if we have options. */
314		daddr = inet->daddr;
315		if(opt && opt->srr)
316			daddr = opt->faddr;
317
318		{
319			struct flowi fl = { .oif = sk->sk_bound_dev_if,
320					    .nl_u = { .ip4_u =
321						      { .daddr = daddr,
322							.saddr = inet->saddr,
323							.tos = RT_CONN_FLAGS(sk) } },
324					    .proto = sk->sk_protocol,
325					    .uli_u = { .ports =
326						       { .sport = inet->sport,
327							 .dport = inet->dport } } };
328
329			/* If this fails, retransmit mechanism of transport layer will
330			 * keep trying until route appears or the connection times
331			 * itself out.
332			 */
333			security_sk_classify_flow(sk, &fl);
334			if (ip_route_output_flow(&rt, &fl, sk, 0))
335				goto no_route;
336		}
337		sk_setup_caps(sk, &rt->u.dst);
338	}
339	skb->dst = dst_clone(&rt->u.dst);
340
341packet_routed:
342	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
343		goto no_route;
344
345	/* OK, we know where to send it, allocate and build IP header. */
346	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
347	skb_reset_network_header(skb);
348	iph = ip_hdr(skb);
349	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
350	iph->tot_len = htons(skb->len);
351	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
352		iph->frag_off = htons(IP_DF);
353	else
354		iph->frag_off = 0;
355	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
356	iph->protocol = sk->sk_protocol;
357	iph->saddr    = rt->rt_src;
358	iph->daddr    = rt->rt_dst;
359	/* Transport layer set skb->h.foo itself. */
360
361	if (opt && opt->optlen) {
362		iph->ihl += opt->optlen >> 2;
363		ip_options_build(skb, opt, inet->daddr, rt, 0);
364	}
365
366	ip_select_ident_more(iph, &rt->u.dst, sk,
367			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
368
369	/* Add an IP checksum. */
370	ip_send_check(iph);
371
372	skb->priority = sk->sk_priority;
373
374	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
375		       dst_output);
376
377no_route:
378	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
379	kfree_skb(skb);
380	return -EHOSTUNREACH;
381}
382
383
384static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
385{
386	to->pkt_type = from->pkt_type;
387	to->priority = from->priority;
388	to->protocol = from->protocol;
389	dst_release(to->dst);
390	to->dst = dst_clone(from->dst);
391	to->dev = from->dev;
392	to->mark = from->mark;
393
394	/* Copy the flags to each fragment. */
395	IPCB(to)->flags = IPCB(from)->flags;
396
397#ifdef CONFIG_NET_SCHED
398	to->tc_index = from->tc_index;
399#endif
400	nf_copy(to, from);
401#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
402    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
403	to->nf_trace = from->nf_trace;
404#endif
405#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
406	to->ipvs_property = from->ipvs_property;
407#endif
408	skb_copy_secmark(to, from);
409}
410
411/*
412 *	This IP datagram is too large to be sent in one piece.  Break it up into
413 *	smaller pieces (each of size equal to IP header plus
414 *	a block of the data of the original IP data part) that will yet fit in a
415 *	single device frame, and queue such a frame for sending.
416 */
417
418int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
419{
420	struct iphdr *iph;
421	int raw = 0;
422	int ptr;
423	struct net_device *dev;
424	struct sk_buff *skb2;
425	unsigned int mtu, hlen, left, len, ll_rs, pad;
426	int offset;
427	__be16 not_last_frag;
428	struct rtable *rt = (struct rtable*)skb->dst;
429	int err = 0;
430
431	dev = rt->u.dst.dev;
432
433	/*
434	 *	Point into the IP datagram header.
435	 */
436
437	iph = ip_hdr(skb);
438
439	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
440		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
441		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
442			  htonl(ip_skb_dst_mtu(skb)));
443		kfree_skb(skb);
444		return -EMSGSIZE;
445	}
446
447	/*
448	 *	Setup starting values.
449	 */
450
451	hlen = iph->ihl * 4;
452	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
453	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
454
455	/* When frag_list is given, use it. First, check its validity:
456	 * some transformers could create wrong frag_list or break existing
457	 * one, it is not prohibited. In this case fall back to copying.
458	 *
459	 * LATER: this step can be merged to real generation of fragments,
460	 * we can switch to copy when see the first bad fragment.
461	 */
462	if (skb_shinfo(skb)->frag_list) {
463		struct sk_buff *frag;
464		int first_len = skb_pagelen(skb);
465
466		if (first_len - hlen > mtu ||
467		    ((first_len - hlen) & 7) ||
468		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
469		    skb_cloned(skb))
470			goto slow_path;
471
472		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
473			/* Correct geometry. */
474			if (frag->len > mtu ||
475			    ((frag->len & 7) && frag->next) ||
476			    skb_headroom(frag) < hlen)
477			    goto slow_path;
478
479			/* Partially cloned skb? */
480			if (skb_shared(frag))
481				goto slow_path;
482
483			BUG_ON(frag->sk);
484			if (skb->sk) {
485				sock_hold(skb->sk);
486				frag->sk = skb->sk;
487				frag->destructor = sock_wfree;
488				skb->truesize -= frag->truesize;
489			}
490		}
491
492		/* Everything is OK. Generate! */
493
494		err = 0;
495		offset = 0;
496		frag = skb_shinfo(skb)->frag_list;
497		skb_shinfo(skb)->frag_list = NULL;
498		skb->data_len = first_len - skb_headlen(skb);
499		skb->len = first_len;
500		iph->tot_len = htons(first_len);
501		iph->frag_off = htons(IP_MF);
502		ip_send_check(iph);
503
504		for (;;) {
505			/* Prepare header of the next frame,
506			 * before previous one went down. */
507			if (frag) {
508				frag->ip_summed = CHECKSUM_NONE;
509				skb_reset_transport_header(frag);
510				__skb_push(frag, hlen);
511				skb_reset_network_header(frag);
512				memcpy(skb_network_header(frag), iph, hlen);
513				iph = ip_hdr(frag);
514				iph->tot_len = htons(frag->len);
515				ip_copy_metadata(frag, skb);
516				if (offset == 0)
517					ip_options_fragment(frag);
518				offset += skb->len - hlen;
519				iph->frag_off = htons(offset>>3);
520				if (frag->next != NULL)
521					iph->frag_off |= htons(IP_MF);
522				/* Ready, complete checksum */
523				ip_send_check(iph);
524			}
525
526			err = output(skb);
527
528			if (!err)
529				IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
530			if (err || !frag)
531				break;
532
533			skb = frag;
534			frag = skb->next;
535			skb->next = NULL;
536		}
537
538		if (err == 0) {
539			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
540			return 0;
541		}
542
543		while (frag) {
544			skb = frag->next;
545			kfree_skb(frag);
546			frag = skb;
547		}
548		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
549		return err;
550	}
551
552slow_path:
553	left = skb->len - hlen;		/* Space per frame */
554	ptr = raw + hlen;		/* Where to start from */
555
556	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
557	 * we need to make room for the encapsulating header
558	 */
559	pad = nf_bridge_pad(skb);
560	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
561	mtu -= pad;
562
563	/*
564	 *	Fragment the datagram.
565	 */
566
567	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
568	not_last_frag = iph->frag_off & htons(IP_MF);
569
570	/*
571	 *	Keep copying data until we run out.
572	 */
573
574	while (left > 0) {
575		len = left;
576		/* IF: it doesn't fit, use 'mtu' - the data space left */
577		if (len > mtu)
578			len = mtu;
579		/* IF: we are not sending upto and including the packet end
580		   then align the next start on an eight byte boundary */
581		if (len < left)	{
582			len &= ~7;
583		}
584		/*
585		 *	Allocate buffer.
586		 */
587
588		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
589			NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
590			err = -ENOMEM;
591			goto fail;
592		}
593
594		/*
595		 *	Set up data on packet
596		 */
597
598		ip_copy_metadata(skb2, skb);
599		skb_reserve(skb2, ll_rs);
600		skb_put(skb2, len + hlen);
601		skb_reset_network_header(skb2);
602		skb2->transport_header = skb2->network_header + hlen;
603
604		/*
605		 *	Charge the memory for the fragment to any owner
606		 *	it might possess
607		 */
608
609		if (skb->sk)
610			skb_set_owner_w(skb2, skb->sk);
611
612		/*
613		 *	Copy the packet header into the new buffer.
614		 */
615
616		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
617
618		/*
619		 *	Copy a block of the IP datagram.
620		 */
621		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
622			BUG();
623		left -= len;
624
625		/*
626		 *	Fill in the new header fields.
627		 */
628		iph = ip_hdr(skb2);
629		iph->frag_off = htons((offset >> 3));
630
631		/* ANK: dirty, but effective trick. Upgrade options only if
632		 * the segment to be fragmented was THE FIRST (otherwise,
633		 * options are already fixed) and make it ONCE
634		 * on the initial skb, so that all the following fragments
635		 * will inherit fixed options.
636		 */
637		if (offset == 0)
638			ip_options_fragment(skb);
639
640		/*
641		 *	Added AC : If we are fragmenting a fragment that's not the
642		 *		   last fragment then keep MF on each bit
643		 */
644		if (left > 0 || not_last_frag)
645			iph->frag_off |= htons(IP_MF);
646		ptr += len;
647		offset += len;
648
649		/*
650		 *	Put this fragment into the sending queue.
651		 */
652		iph->tot_len = htons(len + hlen);
653
654		ip_send_check(iph);
655
656		err = output(skb2);
657		if (err)
658			goto fail;
659
660		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
661	}
662	kfree_skb(skb);
663	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
664	return err;
665
666fail:
667	kfree_skb(skb);
668	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
669	return err;
670}
671
672EXPORT_SYMBOL(ip_fragment);
673
674int
675ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
676{
677	struct iovec *iov = from;
678
679	if (skb->ip_summed == CHECKSUM_PARTIAL) {
680		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
681			return -EFAULT;
682	} else {
683		__wsum csum = 0;
684		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
685			return -EFAULT;
686		skb->csum = csum_block_add(skb->csum, csum, odd);
687	}
688	return 0;
689}
690
691static inline __wsum
692csum_page(struct page *page, int offset, int copy)
693{
694	char *kaddr;
695	__wsum csum;
696	kaddr = kmap(page);
697	csum = csum_partial(kaddr + offset, copy, 0);
698	kunmap(page);
699	return csum;
700}
701
702static inline int ip_ufo_append_data(struct sock *sk,
703			int getfrag(void *from, char *to, int offset, int len,
704			       int odd, struct sk_buff *skb),
705			void *from, int length, int hh_len, int fragheaderlen,
706			int transhdrlen, int mtu,unsigned int flags)
707{
708	struct sk_buff *skb;
709	int err;
710
711	/* There is support for UDP fragmentation offload by network
712	 * device, so create one single skb packet containing complete
713	 * udp datagram
714	 */
715	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
716		skb = sock_alloc_send_skb(sk,
717			hh_len + fragheaderlen + transhdrlen + 20,
718			(flags & MSG_DONTWAIT), &err);
719
720		if (skb == NULL)
721			return err;
722
723		/* reserve space for Hardware header */
724		skb_reserve(skb, hh_len);
725
726		/* create space for UDP/IP header */
727		skb_put(skb,fragheaderlen + transhdrlen);
728
729		/* initialize network header pointer */
730		skb_reset_network_header(skb);
731
732		/* initialize protocol header pointer */
733		skb->transport_header = skb->network_header + fragheaderlen;
734
735		skb->ip_summed = CHECKSUM_PARTIAL;
736		skb->csum = 0;
737		sk->sk_sndmsg_off = 0;
738	}
739
740	err = skb_append_datato_frags(sk,skb, getfrag, from,
741			       (length - transhdrlen));
742	if (!err) {
743		/* specify the length of each IP datagram fragment*/
744		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
745		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
746		__skb_queue_tail(&sk->sk_write_queue, skb);
747
748		return 0;
749	}
750	/* There is not enough support do UFO ,
751	 * so follow normal path
752	 */
753	kfree_skb(skb);
754	return err;
755}
756
757/*
758 *	ip_append_data() and ip_append_page() can make one large IP datagram
759 *	from many pieces of data. Each pieces will be holded on the socket
760 *	until ip_push_pending_frames() is called. Each piece can be a page
761 *	or non-page data.
762 *
763 *	Not only UDP, other transport protocols - e.g. raw sockets - can use
764 *	this interface potentially.
765 *
766 *	LATER: length must be adjusted by pad at tail, when it is required.
767 */
768int ip_append_data(struct sock *sk,
769		   int getfrag(void *from, char *to, int offset, int len,
770			       int odd, struct sk_buff *skb),
771		   void *from, int length, int transhdrlen,
772		   struct ipcm_cookie *ipc, struct rtable *rt,
773		   unsigned int flags)
774{
775	struct inet_sock *inet = inet_sk(sk);
776	struct sk_buff *skb;
777
778	struct ip_options *opt = NULL;
779	int hh_len;
780	int exthdrlen;
781	int mtu;
782	int copy;
783	int err;
784	int offset = 0;
785	unsigned int maxfraglen, fragheaderlen;
786	int csummode = CHECKSUM_NONE;
787
788	if (flags&MSG_PROBE)
789		return 0;
790
791	if (skb_queue_empty(&sk->sk_write_queue)) {
792		/*
793		 * setup for corking.
794		 */
795		opt = ipc->opt;
796		if (opt) {
797			if (inet->cork.opt == NULL) {
798				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
799				if (unlikely(inet->cork.opt == NULL))
800					return -ENOBUFS;
801			}
802			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
803			inet->cork.flags |= IPCORK_OPT;
804			inet->cork.addr = ipc->addr;
805		}
806		dst_hold(&rt->u.dst);
807		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
808					    rt->u.dst.dev->mtu :
809					    dst_mtu(rt->u.dst.path);
810		inet->cork.rt = rt;
811		inet->cork.length = 0;
812		sk->sk_sndmsg_page = NULL;
813		sk->sk_sndmsg_off = 0;
814		if ((exthdrlen = rt->u.dst.header_len) != 0) {
815			length += exthdrlen;
816			transhdrlen += exthdrlen;
817		}
818	} else {
819		rt = inet->cork.rt;
820		if (inet->cork.flags & IPCORK_OPT)
821			opt = inet->cork.opt;
822
823		transhdrlen = 0;
824		exthdrlen = 0;
825		mtu = inet->cork.fragsize;
826	}
827	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
828
829	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
830	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
831
832	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
833		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
834		return -EMSGSIZE;
835	}
836
837	/*
838	 * transhdrlen > 0 means that this is the first fragment and we wish
839	 * it won't be fragmented in the future.
840	 */
841	if (transhdrlen &&
842	    length + fragheaderlen <= mtu &&
843	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
844	    !exthdrlen)
845		csummode = CHECKSUM_PARTIAL;
846
847	inet->cork.length += length;
848	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
849			(rt->u.dst.dev->features & NETIF_F_UFO)) {
850
851		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
852					 fragheaderlen, transhdrlen, mtu,
853					 flags);
854		if (err)
855			goto error;
856		return 0;
857	}
858
859	/* So, what's going on in the loop below?
860	 *
861	 * We use calculated fragment length to generate chained skb,
862	 * each of segments is IP fragment ready for sending to network after
863	 * adding appropriate IP header.
864	 */
865
866	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
867		goto alloc_new_skb;
868
869	while (length > 0) {
870		/* Check if the remaining data fits into current packet. */
871		copy = mtu - skb->len;
872		if (copy < length)
873			copy = maxfraglen - skb->len;
874		if (copy <= 0) {
875			char *data;
876			unsigned int datalen;
877			unsigned int fraglen;
878			unsigned int fraggap;
879			unsigned int alloclen;
880			struct sk_buff *skb_prev;
881alloc_new_skb:
882			skb_prev = skb;
883			if (skb_prev)
884				fraggap = skb_prev->len - maxfraglen;
885			else
886				fraggap = 0;
887
888			/*
889			 * If remaining data exceeds the mtu,
890			 * we know we need more fragment(s).
891			 */
892			datalen = length + fraggap;
893			if (datalen > mtu - fragheaderlen)
894				datalen = maxfraglen - fragheaderlen;
895			fraglen = datalen + fragheaderlen;
896
897			if ((flags & MSG_MORE) &&
898			    !(rt->u.dst.dev->features&NETIF_F_SG))
899				alloclen = mtu;
900			else
901				alloclen = datalen + fragheaderlen;
902
903			/* The last fragment gets additional space at tail.
904			 * Note, with MSG_MORE we overallocate on fragments,
905			 * because we have no idea what fragment will be
906			 * the last.
907			 */
908			if (datalen == length + fraggap)
909				alloclen += rt->u.dst.trailer_len;
910
911			if (transhdrlen) {
912				skb = sock_alloc_send_skb(sk,
913						alloclen + hh_len + 15,
914						(flags & MSG_DONTWAIT), &err);
915			} else {
916				skb = NULL;
917				if (atomic_read(&sk->sk_wmem_alloc) <=
918				    2 * sk->sk_sndbuf)
919					skb = sock_wmalloc(sk,
920							   alloclen + hh_len + 15, 1,
921							   sk->sk_allocation);
922				if (unlikely(skb == NULL))
923					err = -ENOBUFS;
924			}
925			if (skb == NULL)
926				goto error;
927
928			/*
929			 *	Fill in the control structures
930			 */
931			skb->ip_summed = csummode;
932			skb->csum = 0;
933			skb_reserve(skb, hh_len);
934
935			/*
936			 *	Find where to start putting bytes.
937			 */
938			data = skb_put(skb, fraglen);
939			skb_set_network_header(skb, exthdrlen);
940			skb->transport_header = (skb->network_header +
941						 fragheaderlen);
942			data += fragheaderlen;
943
944			if (fraggap) {
945				skb->csum = skb_copy_and_csum_bits(
946					skb_prev, maxfraglen,
947					data + transhdrlen, fraggap, 0);
948				skb_prev->csum = csum_sub(skb_prev->csum,
949							  skb->csum);
950				data += fraggap;
951				pskb_trim_unique(skb_prev, maxfraglen);
952			}
953
954			copy = datalen - transhdrlen - fraggap;
955			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
956				err = -EFAULT;
957				kfree_skb(skb);
958				goto error;
959			}
960
961			offset += copy;
962			length -= datalen - fraggap;
963			transhdrlen = 0;
964			exthdrlen = 0;
965			csummode = CHECKSUM_NONE;
966
967			/*
968			 * Put the packet on the pending queue.
969			 */
970			__skb_queue_tail(&sk->sk_write_queue, skb);
971			continue;
972		}
973
974		if (copy > length)
975			copy = length;
976
977		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
978			unsigned int off;
979
980			off = skb->len;
981			if (getfrag(from, skb_put(skb, copy),
982					offset, copy, off, skb) < 0) {
983				__skb_trim(skb, off);
984				err = -EFAULT;
985				goto error;
986			}
987		} else {
988			int i = skb_shinfo(skb)->nr_frags;
989			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
990			struct page *page = sk->sk_sndmsg_page;
991			int off = sk->sk_sndmsg_off;
992			unsigned int left;
993
994			if (page && (left = PAGE_SIZE - off) > 0) {
995				if (copy >= left)
996					copy = left;
997				if (page != frag->page) {
998					if (i == MAX_SKB_FRAGS) {
999						err = -EMSGSIZE;
1000						goto error;
1001					}
1002					get_page(page);
1003					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1004					frag = &skb_shinfo(skb)->frags[i];
1005				}
1006			} else if (i < MAX_SKB_FRAGS) {
1007				if (copy > PAGE_SIZE)
1008					copy = PAGE_SIZE;
1009				page = alloc_pages(sk->sk_allocation, 0);
1010				if (page == NULL)  {
1011					err = -ENOMEM;
1012					goto error;
1013				}
1014				sk->sk_sndmsg_page = page;
1015				sk->sk_sndmsg_off = 0;
1016
1017				skb_fill_page_desc(skb, i, page, 0, 0);
1018				frag = &skb_shinfo(skb)->frags[i];
1019				skb->truesize += PAGE_SIZE;
1020				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1021			} else {
1022				err = -EMSGSIZE;
1023				goto error;
1024			}
1025			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1026				err = -EFAULT;
1027				goto error;
1028			}
1029			sk->sk_sndmsg_off += copy;
1030			frag->size += copy;
1031			skb->len += copy;
1032			skb->data_len += copy;
1033		}
1034		offset += copy;
1035		length -= copy;
1036	}
1037
1038	return 0;
1039
1040error:
1041	inet->cork.length -= length;
1042	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1043	return err;
1044}
1045
1046ssize_t	ip_append_page(struct sock *sk, struct page *page,
1047		       int offset, size_t size, int flags)
1048{
1049	struct inet_sock *inet = inet_sk(sk);
1050	struct sk_buff *skb;
1051	struct rtable *rt;
1052	struct ip_options *opt = NULL;
1053	int hh_len;
1054	int mtu;
1055	int len;
1056	int err;
1057	unsigned int maxfraglen, fragheaderlen, fraggap;
1058
1059	if (inet->hdrincl)
1060		return -EPERM;
1061
1062	if (flags&MSG_PROBE)
1063		return 0;
1064
1065	if (skb_queue_empty(&sk->sk_write_queue))
1066		return -EINVAL;
1067
1068	rt = inet->cork.rt;
1069	if (inet->cork.flags & IPCORK_OPT)
1070		opt = inet->cork.opt;
1071
1072	if (!(rt->u.dst.dev->features&NETIF_F_SG))
1073		return -EOPNOTSUPP;
1074
1075	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1076	mtu = inet->cork.fragsize;
1077
1078	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1079	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1080
1081	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1082		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1083		return -EMSGSIZE;
1084	}
1085
1086	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1087		return -EINVAL;
1088
1089	inet->cork.length += size;
1090	if ((sk->sk_protocol == IPPROTO_UDP) &&
1091	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1092		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1093		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1094	}
1095
1096
1097	while (size > 0) {
1098		int i;
1099
1100		if (skb_is_gso(skb))
1101			len = size;
1102		else {
1103
1104			/* Check if the remaining data fits into current packet. */
1105			len = mtu - skb->len;
1106			if (len < size)
1107				len = maxfraglen - skb->len;
1108		}
1109		if (len <= 0) {
1110			struct sk_buff *skb_prev;
1111			int alloclen;
1112
1113			skb_prev = skb;
1114			fraggap = skb_prev->len - maxfraglen;
1115
1116			alloclen = fragheaderlen + hh_len + fraggap + 15;
1117			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1118			if (unlikely(!skb)) {
1119				err = -ENOBUFS;
1120				goto error;
1121			}
1122
1123			/*
1124			 *	Fill in the control structures
1125			 */
1126			skb->ip_summed = CHECKSUM_NONE;
1127			skb->csum = 0;
1128			skb_reserve(skb, hh_len);
1129
1130			/*
1131			 *	Find where to start putting bytes.
1132			 */
1133			skb_put(skb, fragheaderlen + fraggap);
1134			skb_reset_network_header(skb);
1135			skb->transport_header = (skb->network_header +
1136						 fragheaderlen);
1137			if (fraggap) {
1138				skb->csum = skb_copy_and_csum_bits(skb_prev,
1139								   maxfraglen,
1140						    skb_transport_header(skb),
1141								   fraggap, 0);
1142				skb_prev->csum = csum_sub(skb_prev->csum,
1143							  skb->csum);
1144				pskb_trim_unique(skb_prev, maxfraglen);
1145			}
1146
1147			/*
1148			 * Put the packet on the pending queue.
1149			 */
1150			__skb_queue_tail(&sk->sk_write_queue, skb);
1151			continue;
1152		}
1153
1154		i = skb_shinfo(skb)->nr_frags;
1155		if (len > size)
1156			len = size;
1157		if (skb_can_coalesce(skb, i, page, offset)) {
1158			skb_shinfo(skb)->frags[i-1].size += len;
1159		} else if (i < MAX_SKB_FRAGS) {
1160			get_page(page);
1161			skb_fill_page_desc(skb, i, page, offset, len);
1162		} else {
1163			err = -EMSGSIZE;
1164			goto error;
1165		}
1166
1167		if (skb->ip_summed == CHECKSUM_NONE) {
1168			__wsum csum;
1169			csum = csum_page(page, offset, len);
1170			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1171		}
1172
1173		skb->len += len;
1174		skb->data_len += len;
1175		offset += len;
1176		size -= len;
1177	}
1178	return 0;
1179
1180error:
1181	inet->cork.length -= size;
1182	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1183	return err;
1184}
1185
1186static void ip_cork_release(struct inet_sock *inet)
1187{
1188	inet->cork.flags &= ~IPCORK_OPT;
1189	kfree(inet->cork.opt);
1190	inet->cork.opt = NULL;
1191	if (inet->cork.rt) {
1192		ip_rt_put(inet->cork.rt);
1193		inet->cork.rt = NULL;
1194	}
1195}
1196
1197/*
1198 *	Combined all pending IP fragments on the socket as one IP datagram
1199 *	and push them out.
1200 */
1201int ip_push_pending_frames(struct sock *sk)
1202{
1203	struct sk_buff *skb, *tmp_skb;
1204	struct sk_buff **tail_skb;
1205	struct inet_sock *inet = inet_sk(sk);
1206	struct ip_options *opt = NULL;
1207	struct rtable *rt = inet->cork.rt;
1208	struct iphdr *iph;
1209	__be16 df = 0;
1210	__u8 ttl;
1211	int err = 0;
1212
1213	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1214		goto out;
1215	tail_skb = &(skb_shinfo(skb)->frag_list);
1216
1217	/* move skb->data to ip header from ext header */
1218	if (skb->data < skb_network_header(skb))
1219		__skb_pull(skb, skb_network_offset(skb));
1220	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1221		__skb_pull(tmp_skb, skb_network_header_len(skb));
1222		*tail_skb = tmp_skb;
1223		tail_skb = &(tmp_skb->next);
1224		skb->len += tmp_skb->len;
1225		skb->data_len += tmp_skb->len;
1226		skb->truesize += tmp_skb->truesize;
1227		__sock_put(tmp_skb->sk);
1228		tmp_skb->destructor = NULL;
1229		tmp_skb->sk = NULL;
1230	}
1231
1232	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1233	 * to fragment the frame generated here. No matter, what transforms
1234	 * how transforms change size of the packet, it will come out.
1235	 */
1236	if (inet->pmtudisc < IP_PMTUDISC_DO)
1237		skb->local_df = 1;
1238
1239	/* DF bit is set when we want to see DF on outgoing frames.
1240	 * If local_df is set too, we still allow to fragment this frame
1241	 * locally. */
1242	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1243	    (skb->len <= dst_mtu(&rt->u.dst) &&
1244	     ip_dont_fragment(sk, &rt->u.dst)))
1245		df = htons(IP_DF);
1246
1247	if (inet->cork.flags & IPCORK_OPT)
1248		opt = inet->cork.opt;
1249
1250	if (rt->rt_type == RTN_MULTICAST)
1251		ttl = inet->mc_ttl;
1252	else
1253		ttl = ip_select_ttl(inet, &rt->u.dst);
1254
1255	iph = (struct iphdr *)skb->data;
1256	iph->version = 4;
1257	iph->ihl = 5;
1258	if (opt) {
1259		iph->ihl += opt->optlen>>2;
1260		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1261	}
1262	iph->tos = inet->tos;
1263	iph->tot_len = htons(skb->len);
1264	iph->frag_off = df;
1265	ip_select_ident(iph, &rt->u.dst, sk);
1266	iph->ttl = ttl;
1267	iph->protocol = sk->sk_protocol;
1268	iph->saddr = rt->rt_src;
1269	iph->daddr = rt->rt_dst;
1270	ip_send_check(iph);
1271
1272	skb->priority = sk->sk_priority;
1273	skb->dst = dst_clone(&rt->u.dst);
1274
1275	if (iph->protocol == IPPROTO_ICMP)
1276		icmp_out_count(((struct icmphdr *)
1277			skb_transport_header(skb))->type);
1278
1279	/* Netfilter gets whole the not fragmented skb. */
1280	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1281		      skb->dst->dev, dst_output);
1282	if (err) {
1283		if (err > 0)
1284			err = inet->recverr ? net_xmit_errno(err) : 0;
1285		if (err)
1286			goto error;
1287	}
1288
1289out:
1290	ip_cork_release(inet);
1291	return err;
1292
1293error:
1294	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1295	goto out;
1296}
1297
1298/*
1299 *	Throw away all pending data on the socket.
1300 */
1301void ip_flush_pending_frames(struct sock *sk)
1302{
1303	struct sk_buff *skb;
1304
1305	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1306		kfree_skb(skb);
1307
1308	ip_cork_release(inet_sk(sk));
1309}
1310
1311
1312/*
1313 *	Fetch data from kernel space and fill in checksum if needed.
1314 */
1315static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1316			      int len, int odd, struct sk_buff *skb)
1317{
1318	__wsum csum;
1319
1320	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1321	skb->csum = csum_block_add(skb->csum, csum, odd);
1322	return 0;
1323}
1324
1325/*
1326 *	Generic function to send a packet as reply to another packet.
1327 *	Used to send TCP resets so far. ICMP should use this function too.
1328 *
1329 *	Should run single threaded per socket because it uses the sock
1330 *     	structure to pass arguments.
1331 *
1332 *	LATER: switch from ip_build_xmit to ip_append_*
1333 */
1334void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1335		   unsigned int len)
1336{
1337	struct inet_sock *inet = inet_sk(sk);
1338	struct {
1339		struct ip_options	opt;
1340		char			data[40];
1341	} replyopts;
1342	struct ipcm_cookie ipc;
1343	__be32 daddr;
1344	struct rtable *rt = (struct rtable*)skb->dst;
1345
1346	if (ip_options_echo(&replyopts.opt, skb))
1347		return;
1348
1349	daddr = ipc.addr = rt->rt_src;
1350	ipc.opt = NULL;
1351
1352	if (replyopts.opt.optlen) {
1353		ipc.opt = &replyopts.opt;
1354
1355		if (ipc.opt->srr)
1356			daddr = replyopts.opt.faddr;
1357	}
1358
1359	{
1360		struct flowi fl = { .oif = arg->bound_dev_if,
1361				    .nl_u = { .ip4_u =
1362					      { .daddr = daddr,
1363						.saddr = rt->rt_spec_dst,
1364						.tos = RT_TOS(ip_hdr(skb)->tos) } },
1365				    /* Not quite clean, but right. */
1366				    .uli_u = { .ports =
1367					       { .sport = tcp_hdr(skb)->dest,
1368						 .dport = tcp_hdr(skb)->source } },
1369				    .proto = sk->sk_protocol };
1370		security_skb_classify_flow(skb, &fl);
1371		if (ip_route_output_key(&rt, &fl))
1372			return;
1373	}
1374
1375	/* And let IP do all the hard work.
1376
1377	   This chunk is not reenterable, hence spinlock.
1378	   Note that it uses the fact, that this function is called
1379	   with locally disabled BH and that sk cannot be already spinlocked.
1380	 */
1381	bh_lock_sock(sk);
1382	inet->tos = ip_hdr(skb)->tos;
1383	sk->sk_priority = skb->priority;
1384	sk->sk_protocol = ip_hdr(skb)->protocol;
1385	sk->sk_bound_dev_if = arg->bound_dev_if;
1386	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1387		       &ipc, rt, MSG_DONTWAIT);
1388	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1389		if (arg->csumoffset >= 0)
1390			*((__sum16 *)skb_transport_header(skb) +
1391			  arg->csumoffset) = csum_fold(csum_add(skb->csum,
1392								arg->csum));
1393		skb->ip_summed = CHECKSUM_NONE;
1394		ip_push_pending_frames(sk);
1395	}
1396
1397	bh_unlock_sock(sk);
1398
1399	ip_rt_put(rt);
1400}
1401
1402void __init ip_init(void)
1403{
1404	ip_rt_init();
1405	inet_initpeers();
1406
1407#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1408	igmp_mc_proc_init();
1409#endif
1410}
1411
1412EXPORT_SYMBOL(ip_generic_getfrag);
1413EXPORT_SYMBOL(ip_queue_xmit);
1414EXPORT_SYMBOL(ip_send_check);
1415