ip_output.c revision bbe735e4247dba32568a305553b010081c8dea99
1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The Internet Protocol (IP) output module.
7 *
8 * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9 *
10 * Authors:	Ross Biro
11 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *		Donald Becker, <becker@super.org>
13 *		Alan Cox, <Alan.Cox@linux.org>
14 *		Richard Underwood
15 *		Stefan Becker, <stefanb@yello.ping.de>
16 *		Jorge Cwik, <jorge@laser.satlink.net>
17 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *		Hirokazu Takahashi, <taka@valinux.co.jp>
19 *
20 *	See ip_input.c for original log
21 *
22 *	Fixes:
23 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
24 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
25 *		Bradford Johnson:	Fix faulty handling of some frames when
26 *					no route is found.
27 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
28 *					(in case if packet not accepted by
29 *					output firewall rules)
30 *		Mike McLagan	:	Routing by source
31 *		Alexey Kuznetsov:	use new route cache
32 *		Andi Kleen:		Fix broken PMTU recovery and remove
33 *					some redundant tests.
34 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
35 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
36 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
37 *					for decreased register pressure on x86
38 *					and more readibility.
39 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
40 *					silently drop skb instead of failing with -EPERM.
41 *		Detlev Wengorz	:	Copy protocol for fragments.
42 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
43 *					datagrams.
44 *		Hirokazu Takahashi:	sendfile() on UDP works now.
45 */
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/mm.h>
53#include <linux/string.h>
54#include <linux/errno.h>
55#include <linux/highmem.h>
56
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/in.h>
60#include <linux/inet.h>
61#include <linux/netdevice.h>
62#include <linux/etherdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/stat.h>
65#include <linux/init.h>
66
67#include <net/snmp.h>
68#include <net/ip.h>
69#include <net/protocol.h>
70#include <net/route.h>
71#include <net/xfrm.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <net/arp.h>
75#include <net/icmp.h>
76#include <net/checksum.h>
77#include <net/inetpeer.h>
78#include <net/checksum.h>
79#include <linux/igmp.h>
80#include <linux/netfilter_ipv4.h>
81#include <linux/netfilter_bridge.h>
82#include <linux/mroute.h>
83#include <linux/netlink.h>
84#include <linux/tcp.h>
85
86int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
87
88/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph)
90{
91	iph->check = 0;
92	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
93}
94
95/* dev_loopback_xmit for use with netfilter. */
96static int ip_dev_loopback_xmit(struct sk_buff *newskb)
97{
98	skb_reset_mac_header(newskb);
99	__skb_pull(newskb, skb_network_offset(newskb));
100	newskb->pkt_type = PACKET_LOOPBACK;
101	newskb->ip_summed = CHECKSUM_UNNECESSARY;
102	BUG_TRAP(newskb->dst);
103	netif_rx(newskb);
104	return 0;
105}
106
107static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
108{
109	int ttl = inet->uc_ttl;
110
111	if (ttl < 0)
112		ttl = dst_metric(dst, RTAX_HOPLIMIT);
113	return ttl;
114}
115
116/*
117 *		Add an ip header to a skbuff and send it out.
118 *
119 */
120int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
121			  __be32 saddr, __be32 daddr, struct ip_options *opt)
122{
123	struct inet_sock *inet = inet_sk(sk);
124	struct rtable *rt = (struct rtable *)skb->dst;
125	struct iphdr *iph;
126
127	/* Build the IP header. */
128	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
129	skb_reset_network_header(skb);
130	iph = skb->nh.iph;
131	iph->version  = 4;
132	iph->ihl      = 5;
133	iph->tos      = inet->tos;
134	if (ip_dont_fragment(sk, &rt->u.dst))
135		iph->frag_off = htons(IP_DF);
136	else
137		iph->frag_off = 0;
138	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
139	iph->daddr    = rt->rt_dst;
140	iph->saddr    = rt->rt_src;
141	iph->protocol = sk->sk_protocol;
142	iph->tot_len  = htons(skb->len);
143	ip_select_ident(iph, &rt->u.dst, sk);
144
145	if (opt && opt->optlen) {
146		iph->ihl += opt->optlen>>2;
147		ip_options_build(skb, opt, daddr, rt, 0);
148	}
149	ip_send_check(iph);
150
151	skb->priority = sk->sk_priority;
152
153	/* Send it out. */
154	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
155		       dst_output);
156}
157
158EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
159
160static inline int ip_finish_output2(struct sk_buff *skb)
161{
162	struct dst_entry *dst = skb->dst;
163	struct net_device *dev = dst->dev;
164	int hh_len = LL_RESERVED_SPACE(dev);
165
166	/* Be paranoid, rather than too clever. */
167	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
168		struct sk_buff *skb2;
169
170		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
171		if (skb2 == NULL) {
172			kfree_skb(skb);
173			return -ENOMEM;
174		}
175		if (skb->sk)
176			skb_set_owner_w(skb2, skb->sk);
177		kfree_skb(skb);
178		skb = skb2;
179	}
180
181	if (dst->hh)
182		return neigh_hh_output(dst->hh, skb);
183	else if (dst->neighbour)
184		return dst->neighbour->output(skb);
185
186	if (net_ratelimit())
187		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
188	kfree_skb(skb);
189	return -EINVAL;
190}
191
192static inline int ip_finish_output(struct sk_buff *skb)
193{
194#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
195	/* Policy lookup after SNAT yielded a new policy */
196	if (skb->dst->xfrm != NULL) {
197		IPCB(skb)->flags |= IPSKB_REROUTED;
198		return dst_output(skb);
199	}
200#endif
201	if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
202		return ip_fragment(skb, ip_finish_output2);
203	else
204		return ip_finish_output2(skb);
205}
206
207int ip_mc_output(struct sk_buff *skb)
208{
209	struct sock *sk = skb->sk;
210	struct rtable *rt = (struct rtable*)skb->dst;
211	struct net_device *dev = rt->u.dst.dev;
212
213	/*
214	 *	If the indicated interface is up and running, send the packet.
215	 */
216	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
217
218	skb->dev = dev;
219	skb->protocol = htons(ETH_P_IP);
220
221	/*
222	 *	Multicasts are looped back for other local users
223	 */
224
225	if (rt->rt_flags&RTCF_MULTICAST) {
226		if ((!sk || inet_sk(sk)->mc_loop)
227#ifdef CONFIG_IP_MROUTE
228		/* Small optimization: do not loopback not local frames,
229		   which returned after forwarding; they will be  dropped
230		   by ip_mr_input in any case.
231		   Note, that local frames are looped back to be delivered
232		   to local recipients.
233
234		   This check is duplicated in ip_mr_input at the moment.
235		 */
236		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
237#endif
238		) {
239			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
240			if (newskb)
241				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
242					newskb->dev,
243					ip_dev_loopback_xmit);
244		}
245
246		/* Multicasts with ttl 0 must not go beyond the host */
247
248		if (skb->nh.iph->ttl == 0) {
249			kfree_skb(skb);
250			return 0;
251		}
252	}
253
254	if (rt->rt_flags&RTCF_BROADCAST) {
255		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
256		if (newskb)
257			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
258				newskb->dev, ip_dev_loopback_xmit);
259	}
260
261	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
262			    ip_finish_output,
263			    !(IPCB(skb)->flags & IPSKB_REROUTED));
264}
265
266int ip_output(struct sk_buff *skb)
267{
268	struct net_device *dev = skb->dst->dev;
269
270	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
271
272	skb->dev = dev;
273	skb->protocol = htons(ETH_P_IP);
274
275	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
276			    ip_finish_output,
277			    !(IPCB(skb)->flags & IPSKB_REROUTED));
278}
279
280int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
281{
282	struct sock *sk = skb->sk;
283	struct inet_sock *inet = inet_sk(sk);
284	struct ip_options *opt = inet->opt;
285	struct rtable *rt;
286	struct iphdr *iph;
287
288	/* Skip all of this if the packet is already routed,
289	 * f.e. by something like SCTP.
290	 */
291	rt = (struct rtable *) skb->dst;
292	if (rt != NULL)
293		goto packet_routed;
294
295	/* Make sure we can route this packet. */
296	rt = (struct rtable *)__sk_dst_check(sk, 0);
297	if (rt == NULL) {
298		__be32 daddr;
299
300		/* Use correct destination address if we have options. */
301		daddr = inet->daddr;
302		if(opt && opt->srr)
303			daddr = opt->faddr;
304
305		{
306			struct flowi fl = { .oif = sk->sk_bound_dev_if,
307					    .nl_u = { .ip4_u =
308						      { .daddr = daddr,
309							.saddr = inet->saddr,
310							.tos = RT_CONN_FLAGS(sk) } },
311					    .proto = sk->sk_protocol,
312					    .uli_u = { .ports =
313						       { .sport = inet->sport,
314							 .dport = inet->dport } } };
315
316			/* If this fails, retransmit mechanism of transport layer will
317			 * keep trying until route appears or the connection times
318			 * itself out.
319			 */
320			security_sk_classify_flow(sk, &fl);
321			if (ip_route_output_flow(&rt, &fl, sk, 0))
322				goto no_route;
323		}
324		sk_setup_caps(sk, &rt->u.dst);
325	}
326	skb->dst = dst_clone(&rt->u.dst);
327
328packet_routed:
329	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
330		goto no_route;
331
332	/* OK, we know where to send it, allocate and build IP header. */
333	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
334	skb_reset_network_header(skb);
335	iph = skb->nh.iph;
336	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
337	iph->tot_len = htons(skb->len);
338	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
339		iph->frag_off = htons(IP_DF);
340	else
341		iph->frag_off = 0;
342	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
343	iph->protocol = sk->sk_protocol;
344	iph->saddr    = rt->rt_src;
345	iph->daddr    = rt->rt_dst;
346	/* Transport layer set skb->h.foo itself. */
347
348	if (opt && opt->optlen) {
349		iph->ihl += opt->optlen >> 2;
350		ip_options_build(skb, opt, inet->daddr, rt, 0);
351	}
352
353	ip_select_ident_more(iph, &rt->u.dst, sk,
354			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
355
356	/* Add an IP checksum. */
357	ip_send_check(iph);
358
359	skb->priority = sk->sk_priority;
360
361	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
362		       dst_output);
363
364no_route:
365	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
366	kfree_skb(skb);
367	return -EHOSTUNREACH;
368}
369
370
371static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
372{
373	to->pkt_type = from->pkt_type;
374	to->priority = from->priority;
375	to->protocol = from->protocol;
376	dst_release(to->dst);
377	to->dst = dst_clone(from->dst);
378	to->dev = from->dev;
379	to->mark = from->mark;
380
381	/* Copy the flags to each fragment. */
382	IPCB(to)->flags = IPCB(from)->flags;
383
384#ifdef CONFIG_NET_SCHED
385	to->tc_index = from->tc_index;
386#endif
387#ifdef CONFIG_NETFILTER
388	/* Connection association is same as pre-frag packet */
389	nf_conntrack_put(to->nfct);
390	to->nfct = from->nfct;
391	nf_conntrack_get(to->nfct);
392	to->nfctinfo = from->nfctinfo;
393#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
394	to->ipvs_property = from->ipvs_property;
395#endif
396#ifdef CONFIG_BRIDGE_NETFILTER
397	nf_bridge_put(to->nf_bridge);
398	to->nf_bridge = from->nf_bridge;
399	nf_bridge_get(to->nf_bridge);
400#endif
401#endif
402	skb_copy_secmark(to, from);
403}
404
405/*
406 *	This IP datagram is too large to be sent in one piece.  Break it up into
407 *	smaller pieces (each of size equal to IP header plus
408 *	a block of the data of the original IP data part) that will yet fit in a
409 *	single device frame, and queue such a frame for sending.
410 */
411
412int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
413{
414	struct iphdr *iph;
415	int raw = 0;
416	int ptr;
417	struct net_device *dev;
418	struct sk_buff *skb2;
419	unsigned int mtu, hlen, left, len, ll_rs, pad;
420	int offset;
421	__be16 not_last_frag;
422	struct rtable *rt = (struct rtable*)skb->dst;
423	int err = 0;
424
425	dev = rt->u.dst.dev;
426
427	/*
428	 *	Point into the IP datagram header.
429	 */
430
431	iph = skb->nh.iph;
432
433	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
434		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
435		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
436			  htonl(dst_mtu(&rt->u.dst)));
437		kfree_skb(skb);
438		return -EMSGSIZE;
439	}
440
441	/*
442	 *	Setup starting values.
443	 */
444
445	hlen = iph->ihl * 4;
446	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
447	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
448
449	/* When frag_list is given, use it. First, check its validity:
450	 * some transformers could create wrong frag_list or break existing
451	 * one, it is not prohibited. In this case fall back to copying.
452	 *
453	 * LATER: this step can be merged to real generation of fragments,
454	 * we can switch to copy when see the first bad fragment.
455	 */
456	if (skb_shinfo(skb)->frag_list) {
457		struct sk_buff *frag;
458		int first_len = skb_pagelen(skb);
459
460		if (first_len - hlen > mtu ||
461		    ((first_len - hlen) & 7) ||
462		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
463		    skb_cloned(skb))
464			goto slow_path;
465
466		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
467			/* Correct geometry. */
468			if (frag->len > mtu ||
469			    ((frag->len & 7) && frag->next) ||
470			    skb_headroom(frag) < hlen)
471			    goto slow_path;
472
473			/* Partially cloned skb? */
474			if (skb_shared(frag))
475				goto slow_path;
476
477			BUG_ON(frag->sk);
478			if (skb->sk) {
479				sock_hold(skb->sk);
480				frag->sk = skb->sk;
481				frag->destructor = sock_wfree;
482				skb->truesize -= frag->truesize;
483			}
484		}
485
486		/* Everything is OK. Generate! */
487
488		err = 0;
489		offset = 0;
490		frag = skb_shinfo(skb)->frag_list;
491		skb_shinfo(skb)->frag_list = NULL;
492		skb->data_len = first_len - skb_headlen(skb);
493		skb->len = first_len;
494		iph->tot_len = htons(first_len);
495		iph->frag_off = htons(IP_MF);
496		ip_send_check(iph);
497
498		for (;;) {
499			/* Prepare header of the next frame,
500			 * before previous one went down. */
501			if (frag) {
502				frag->ip_summed = CHECKSUM_NONE;
503				frag->h.raw = frag->data;
504				__skb_push(frag, hlen);
505				skb_reset_network_header(frag);
506				memcpy(frag->nh.raw, iph, hlen);
507				iph = frag->nh.iph;
508				iph->tot_len = htons(frag->len);
509				ip_copy_metadata(frag, skb);
510				if (offset == 0)
511					ip_options_fragment(frag);
512				offset += skb->len - hlen;
513				iph->frag_off = htons(offset>>3);
514				if (frag->next != NULL)
515					iph->frag_off |= htons(IP_MF);
516				/* Ready, complete checksum */
517				ip_send_check(iph);
518			}
519
520			err = output(skb);
521
522			if (!err)
523				IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
524			if (err || !frag)
525				break;
526
527			skb = frag;
528			frag = skb->next;
529			skb->next = NULL;
530		}
531
532		if (err == 0) {
533			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
534			return 0;
535		}
536
537		while (frag) {
538			skb = frag->next;
539			kfree_skb(frag);
540			frag = skb;
541		}
542		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
543		return err;
544	}
545
546slow_path:
547	left = skb->len - hlen;		/* Space per frame */
548	ptr = raw + hlen;		/* Where to start from */
549
550	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
551	 * we need to make room for the encapsulating header
552	 */
553	pad = nf_bridge_pad(skb);
554	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
555	mtu -= pad;
556
557	/*
558	 *	Fragment the datagram.
559	 */
560
561	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
562	not_last_frag = iph->frag_off & htons(IP_MF);
563
564	/*
565	 *	Keep copying data until we run out.
566	 */
567
568	while (left > 0) {
569		len = left;
570		/* IF: it doesn't fit, use 'mtu' - the data space left */
571		if (len > mtu)
572			len = mtu;
573		/* IF: we are not sending upto and including the packet end
574		   then align the next start on an eight byte boundary */
575		if (len < left)	{
576			len &= ~7;
577		}
578		/*
579		 *	Allocate buffer.
580		 */
581
582		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
583			NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
584			err = -ENOMEM;
585			goto fail;
586		}
587
588		/*
589		 *	Set up data on packet
590		 */
591
592		ip_copy_metadata(skb2, skb);
593		skb_reserve(skb2, ll_rs);
594		skb_put(skb2, len + hlen);
595		skb_reset_network_header(skb2);
596		skb2->h.raw = skb2->data + hlen;
597
598		/*
599		 *	Charge the memory for the fragment to any owner
600		 *	it might possess
601		 */
602
603		if (skb->sk)
604			skb_set_owner_w(skb2, skb->sk);
605
606		/*
607		 *	Copy the packet header into the new buffer.
608		 */
609
610		memcpy(skb2->nh.raw, skb->data, hlen);
611
612		/*
613		 *	Copy a block of the IP datagram.
614		 */
615		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
616			BUG();
617		left -= len;
618
619		/*
620		 *	Fill in the new header fields.
621		 */
622		iph = skb2->nh.iph;
623		iph->frag_off = htons((offset >> 3));
624
625		/* ANK: dirty, but effective trick. Upgrade options only if
626		 * the segment to be fragmented was THE FIRST (otherwise,
627		 * options are already fixed) and make it ONCE
628		 * on the initial skb, so that all the following fragments
629		 * will inherit fixed options.
630		 */
631		if (offset == 0)
632			ip_options_fragment(skb);
633
634		/*
635		 *	Added AC : If we are fragmenting a fragment that's not the
636		 *		   last fragment then keep MF on each bit
637		 */
638		if (left > 0 || not_last_frag)
639			iph->frag_off |= htons(IP_MF);
640		ptr += len;
641		offset += len;
642
643		/*
644		 *	Put this fragment into the sending queue.
645		 */
646		iph->tot_len = htons(len + hlen);
647
648		ip_send_check(iph);
649
650		err = output(skb2);
651		if (err)
652			goto fail;
653
654		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
655	}
656	kfree_skb(skb);
657	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
658	return err;
659
660fail:
661	kfree_skb(skb);
662	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
663	return err;
664}
665
666EXPORT_SYMBOL(ip_fragment);
667
668int
669ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
670{
671	struct iovec *iov = from;
672
673	if (skb->ip_summed == CHECKSUM_PARTIAL) {
674		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
675			return -EFAULT;
676	} else {
677		__wsum csum = 0;
678		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
679			return -EFAULT;
680		skb->csum = csum_block_add(skb->csum, csum, odd);
681	}
682	return 0;
683}
684
685static inline __wsum
686csum_page(struct page *page, int offset, int copy)
687{
688	char *kaddr;
689	__wsum csum;
690	kaddr = kmap(page);
691	csum = csum_partial(kaddr + offset, copy, 0);
692	kunmap(page);
693	return csum;
694}
695
696static inline int ip_ufo_append_data(struct sock *sk,
697			int getfrag(void *from, char *to, int offset, int len,
698			       int odd, struct sk_buff *skb),
699			void *from, int length, int hh_len, int fragheaderlen,
700			int transhdrlen, int mtu,unsigned int flags)
701{
702	struct sk_buff *skb;
703	int err;
704
705	/* There is support for UDP fragmentation offload by network
706	 * device, so create one single skb packet containing complete
707	 * udp datagram
708	 */
709	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
710		skb = sock_alloc_send_skb(sk,
711			hh_len + fragheaderlen + transhdrlen + 20,
712			(flags & MSG_DONTWAIT), &err);
713
714		if (skb == NULL)
715			return err;
716
717		/* reserve space for Hardware header */
718		skb_reserve(skb, hh_len);
719
720		/* create space for UDP/IP header */
721		skb_put(skb,fragheaderlen + transhdrlen);
722
723		/* initialize network header pointer */
724		skb_reset_network_header(skb);
725
726		/* initialize protocol header pointer */
727		skb->h.raw = skb->data + fragheaderlen;
728
729		skb->ip_summed = CHECKSUM_PARTIAL;
730		skb->csum = 0;
731		sk->sk_sndmsg_off = 0;
732	}
733
734	err = skb_append_datato_frags(sk,skb, getfrag, from,
735			       (length - transhdrlen));
736	if (!err) {
737		/* specify the length of each IP datagram fragment*/
738		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
739		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
740		__skb_queue_tail(&sk->sk_write_queue, skb);
741
742		return 0;
743	}
744	/* There is not enough support do UFO ,
745	 * so follow normal path
746	 */
747	kfree_skb(skb);
748	return err;
749}
750
751/*
752 *	ip_append_data() and ip_append_page() can make one large IP datagram
753 *	from many pieces of data. Each pieces will be holded on the socket
754 *	until ip_push_pending_frames() is called. Each piece can be a page
755 *	or non-page data.
756 *
757 *	Not only UDP, other transport protocols - e.g. raw sockets - can use
758 *	this interface potentially.
759 *
760 *	LATER: length must be adjusted by pad at tail, when it is required.
761 */
762int ip_append_data(struct sock *sk,
763		   int getfrag(void *from, char *to, int offset, int len,
764			       int odd, struct sk_buff *skb),
765		   void *from, int length, int transhdrlen,
766		   struct ipcm_cookie *ipc, struct rtable *rt,
767		   unsigned int flags)
768{
769	struct inet_sock *inet = inet_sk(sk);
770	struct sk_buff *skb;
771
772	struct ip_options *opt = NULL;
773	int hh_len;
774	int exthdrlen;
775	int mtu;
776	int copy;
777	int err;
778	int offset = 0;
779	unsigned int maxfraglen, fragheaderlen;
780	int csummode = CHECKSUM_NONE;
781
782	if (flags&MSG_PROBE)
783		return 0;
784
785	if (skb_queue_empty(&sk->sk_write_queue)) {
786		/*
787		 * setup for corking.
788		 */
789		opt = ipc->opt;
790		if (opt) {
791			if (inet->cork.opt == NULL) {
792				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
793				if (unlikely(inet->cork.opt == NULL))
794					return -ENOBUFS;
795			}
796			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
797			inet->cork.flags |= IPCORK_OPT;
798			inet->cork.addr = ipc->addr;
799		}
800		dst_hold(&rt->u.dst);
801		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
802		inet->cork.rt = rt;
803		inet->cork.length = 0;
804		sk->sk_sndmsg_page = NULL;
805		sk->sk_sndmsg_off = 0;
806		if ((exthdrlen = rt->u.dst.header_len) != 0) {
807			length += exthdrlen;
808			transhdrlen += exthdrlen;
809		}
810	} else {
811		rt = inet->cork.rt;
812		if (inet->cork.flags & IPCORK_OPT)
813			opt = inet->cork.opt;
814
815		transhdrlen = 0;
816		exthdrlen = 0;
817		mtu = inet->cork.fragsize;
818	}
819	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
820
821	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
822	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
823
824	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
825		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
826		return -EMSGSIZE;
827	}
828
829	/*
830	 * transhdrlen > 0 means that this is the first fragment and we wish
831	 * it won't be fragmented in the future.
832	 */
833	if (transhdrlen &&
834	    length + fragheaderlen <= mtu &&
835	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
836	    !exthdrlen)
837		csummode = CHECKSUM_PARTIAL;
838
839	inet->cork.length += length;
840	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
841			(rt->u.dst.dev->features & NETIF_F_UFO)) {
842
843		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
844					 fragheaderlen, transhdrlen, mtu,
845					 flags);
846		if (err)
847			goto error;
848		return 0;
849	}
850
851	/* So, what's going on in the loop below?
852	 *
853	 * We use calculated fragment length to generate chained skb,
854	 * each of segments is IP fragment ready for sending to network after
855	 * adding appropriate IP header.
856	 */
857
858	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
859		goto alloc_new_skb;
860
861	while (length > 0) {
862		/* Check if the remaining data fits into current packet. */
863		copy = mtu - skb->len;
864		if (copy < length)
865			copy = maxfraglen - skb->len;
866		if (copy <= 0) {
867			char *data;
868			unsigned int datalen;
869			unsigned int fraglen;
870			unsigned int fraggap;
871			unsigned int alloclen;
872			struct sk_buff *skb_prev;
873alloc_new_skb:
874			skb_prev = skb;
875			if (skb_prev)
876				fraggap = skb_prev->len - maxfraglen;
877			else
878				fraggap = 0;
879
880			/*
881			 * If remaining data exceeds the mtu,
882			 * we know we need more fragment(s).
883			 */
884			datalen = length + fraggap;
885			if (datalen > mtu - fragheaderlen)
886				datalen = maxfraglen - fragheaderlen;
887			fraglen = datalen + fragheaderlen;
888
889			if ((flags & MSG_MORE) &&
890			    !(rt->u.dst.dev->features&NETIF_F_SG))
891				alloclen = mtu;
892			else
893				alloclen = datalen + fragheaderlen;
894
895			/* The last fragment gets additional space at tail.
896			 * Note, with MSG_MORE we overallocate on fragments,
897			 * because we have no idea what fragment will be
898			 * the last.
899			 */
900			if (datalen == length + fraggap)
901				alloclen += rt->u.dst.trailer_len;
902
903			if (transhdrlen) {
904				skb = sock_alloc_send_skb(sk,
905						alloclen + hh_len + 15,
906						(flags & MSG_DONTWAIT), &err);
907			} else {
908				skb = NULL;
909				if (atomic_read(&sk->sk_wmem_alloc) <=
910				    2 * sk->sk_sndbuf)
911					skb = sock_wmalloc(sk,
912							   alloclen + hh_len + 15, 1,
913							   sk->sk_allocation);
914				if (unlikely(skb == NULL))
915					err = -ENOBUFS;
916			}
917			if (skb == NULL)
918				goto error;
919
920			/*
921			 *	Fill in the control structures
922			 */
923			skb->ip_summed = csummode;
924			skb->csum = 0;
925			skb_reserve(skb, hh_len);
926
927			/*
928			 *	Find where to start putting bytes.
929			 */
930			data = skb_put(skb, fraglen);
931			skb->nh.raw = data + exthdrlen;
932			data += fragheaderlen;
933			skb->h.raw = data + exthdrlen;
934
935			if (fraggap) {
936				skb->csum = skb_copy_and_csum_bits(
937					skb_prev, maxfraglen,
938					data + transhdrlen, fraggap, 0);
939				skb_prev->csum = csum_sub(skb_prev->csum,
940							  skb->csum);
941				data += fraggap;
942				pskb_trim_unique(skb_prev, maxfraglen);
943			}
944
945			copy = datalen - transhdrlen - fraggap;
946			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
947				err = -EFAULT;
948				kfree_skb(skb);
949				goto error;
950			}
951
952			offset += copy;
953			length -= datalen - fraggap;
954			transhdrlen = 0;
955			exthdrlen = 0;
956			csummode = CHECKSUM_NONE;
957
958			/*
959			 * Put the packet on the pending queue.
960			 */
961			__skb_queue_tail(&sk->sk_write_queue, skb);
962			continue;
963		}
964
965		if (copy > length)
966			copy = length;
967
968		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
969			unsigned int off;
970
971			off = skb->len;
972			if (getfrag(from, skb_put(skb, copy),
973					offset, copy, off, skb) < 0) {
974				__skb_trim(skb, off);
975				err = -EFAULT;
976				goto error;
977			}
978		} else {
979			int i = skb_shinfo(skb)->nr_frags;
980			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
981			struct page *page = sk->sk_sndmsg_page;
982			int off = sk->sk_sndmsg_off;
983			unsigned int left;
984
985			if (page && (left = PAGE_SIZE - off) > 0) {
986				if (copy >= left)
987					copy = left;
988				if (page != frag->page) {
989					if (i == MAX_SKB_FRAGS) {
990						err = -EMSGSIZE;
991						goto error;
992					}
993					get_page(page);
994					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
995					frag = &skb_shinfo(skb)->frags[i];
996				}
997			} else if (i < MAX_SKB_FRAGS) {
998				if (copy > PAGE_SIZE)
999					copy = PAGE_SIZE;
1000				page = alloc_pages(sk->sk_allocation, 0);
1001				if (page == NULL)  {
1002					err = -ENOMEM;
1003					goto error;
1004				}
1005				sk->sk_sndmsg_page = page;
1006				sk->sk_sndmsg_off = 0;
1007
1008				skb_fill_page_desc(skb, i, page, 0, 0);
1009				frag = &skb_shinfo(skb)->frags[i];
1010				skb->truesize += PAGE_SIZE;
1011				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1012			} else {
1013				err = -EMSGSIZE;
1014				goto error;
1015			}
1016			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1017				err = -EFAULT;
1018				goto error;
1019			}
1020			sk->sk_sndmsg_off += copy;
1021			frag->size += copy;
1022			skb->len += copy;
1023			skb->data_len += copy;
1024		}
1025		offset += copy;
1026		length -= copy;
1027	}
1028
1029	return 0;
1030
1031error:
1032	inet->cork.length -= length;
1033	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1034	return err;
1035}
1036
1037ssize_t	ip_append_page(struct sock *sk, struct page *page,
1038		       int offset, size_t size, int flags)
1039{
1040	struct inet_sock *inet = inet_sk(sk);
1041	struct sk_buff *skb;
1042	struct rtable *rt;
1043	struct ip_options *opt = NULL;
1044	int hh_len;
1045	int mtu;
1046	int len;
1047	int err;
1048	unsigned int maxfraglen, fragheaderlen, fraggap;
1049
1050	if (inet->hdrincl)
1051		return -EPERM;
1052
1053	if (flags&MSG_PROBE)
1054		return 0;
1055
1056	if (skb_queue_empty(&sk->sk_write_queue))
1057		return -EINVAL;
1058
1059	rt = inet->cork.rt;
1060	if (inet->cork.flags & IPCORK_OPT)
1061		opt = inet->cork.opt;
1062
1063	if (!(rt->u.dst.dev->features&NETIF_F_SG))
1064		return -EOPNOTSUPP;
1065
1066	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1067	mtu = inet->cork.fragsize;
1068
1069	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1070	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1071
1072	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1073		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1074		return -EMSGSIZE;
1075	}
1076
1077	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1078		return -EINVAL;
1079
1080	inet->cork.length += size;
1081	if ((sk->sk_protocol == IPPROTO_UDP) &&
1082	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1083		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1084		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1085	}
1086
1087
1088	while (size > 0) {
1089		int i;
1090
1091		if (skb_is_gso(skb))
1092			len = size;
1093		else {
1094
1095			/* Check if the remaining data fits into current packet. */
1096			len = mtu - skb->len;
1097			if (len < size)
1098				len = maxfraglen - skb->len;
1099		}
1100		if (len <= 0) {
1101			struct sk_buff *skb_prev;
1102			char *data;
1103			struct iphdr *iph;
1104			int alloclen;
1105
1106			skb_prev = skb;
1107			fraggap = skb_prev->len - maxfraglen;
1108
1109			alloclen = fragheaderlen + hh_len + fraggap + 15;
1110			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1111			if (unlikely(!skb)) {
1112				err = -ENOBUFS;
1113				goto error;
1114			}
1115
1116			/*
1117			 *	Fill in the control structures
1118			 */
1119			skb->ip_summed = CHECKSUM_NONE;
1120			skb->csum = 0;
1121			skb_reserve(skb, hh_len);
1122
1123			/*
1124			 *	Find where to start putting bytes.
1125			 */
1126			data = skb_put(skb, fragheaderlen + fraggap);
1127			skb_reset_network_header(skb);
1128			iph = skb->nh.iph;
1129			data += fragheaderlen;
1130			skb->h.raw = data;
1131
1132			if (fraggap) {
1133				skb->csum = skb_copy_and_csum_bits(
1134					skb_prev, maxfraglen,
1135					data, fraggap, 0);
1136				skb_prev->csum = csum_sub(skb_prev->csum,
1137							  skb->csum);
1138				pskb_trim_unique(skb_prev, maxfraglen);
1139			}
1140
1141			/*
1142			 * Put the packet on the pending queue.
1143			 */
1144			__skb_queue_tail(&sk->sk_write_queue, skb);
1145			continue;
1146		}
1147
1148		i = skb_shinfo(skb)->nr_frags;
1149		if (len > size)
1150			len = size;
1151		if (skb_can_coalesce(skb, i, page, offset)) {
1152			skb_shinfo(skb)->frags[i-1].size += len;
1153		} else if (i < MAX_SKB_FRAGS) {
1154			get_page(page);
1155			skb_fill_page_desc(skb, i, page, offset, len);
1156		} else {
1157			err = -EMSGSIZE;
1158			goto error;
1159		}
1160
1161		if (skb->ip_summed == CHECKSUM_NONE) {
1162			__wsum csum;
1163			csum = csum_page(page, offset, len);
1164			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1165		}
1166
1167		skb->len += len;
1168		skb->data_len += len;
1169		offset += len;
1170		size -= len;
1171	}
1172	return 0;
1173
1174error:
1175	inet->cork.length -= size;
1176	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1177	return err;
1178}
1179
1180/*
1181 *	Combined all pending IP fragments on the socket as one IP datagram
1182 *	and push them out.
1183 */
1184int ip_push_pending_frames(struct sock *sk)
1185{
1186	struct sk_buff *skb, *tmp_skb;
1187	struct sk_buff **tail_skb;
1188	struct inet_sock *inet = inet_sk(sk);
1189	struct ip_options *opt = NULL;
1190	struct rtable *rt = inet->cork.rt;
1191	struct iphdr *iph;
1192	__be16 df = 0;
1193	__u8 ttl;
1194	int err = 0;
1195
1196	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1197		goto out;
1198	tail_skb = &(skb_shinfo(skb)->frag_list);
1199
1200	/* move skb->data to ip header from ext header */
1201	if (skb->data < skb->nh.raw)
1202		__skb_pull(skb, skb_network_offset(skb));
1203	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1204		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1205		*tail_skb = tmp_skb;
1206		tail_skb = &(tmp_skb->next);
1207		skb->len += tmp_skb->len;
1208		skb->data_len += tmp_skb->len;
1209		skb->truesize += tmp_skb->truesize;
1210		__sock_put(tmp_skb->sk);
1211		tmp_skb->destructor = NULL;
1212		tmp_skb->sk = NULL;
1213	}
1214
1215	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1216	 * to fragment the frame generated here. No matter, what transforms
1217	 * how transforms change size of the packet, it will come out.
1218	 */
1219	if (inet->pmtudisc != IP_PMTUDISC_DO)
1220		skb->local_df = 1;
1221
1222	/* DF bit is set when we want to see DF on outgoing frames.
1223	 * If local_df is set too, we still allow to fragment this frame
1224	 * locally. */
1225	if (inet->pmtudisc == IP_PMTUDISC_DO ||
1226	    (skb->len <= dst_mtu(&rt->u.dst) &&
1227	     ip_dont_fragment(sk, &rt->u.dst)))
1228		df = htons(IP_DF);
1229
1230	if (inet->cork.flags & IPCORK_OPT)
1231		opt = inet->cork.opt;
1232
1233	if (rt->rt_type == RTN_MULTICAST)
1234		ttl = inet->mc_ttl;
1235	else
1236		ttl = ip_select_ttl(inet, &rt->u.dst);
1237
1238	iph = (struct iphdr *)skb->data;
1239	iph->version = 4;
1240	iph->ihl = 5;
1241	if (opt) {
1242		iph->ihl += opt->optlen>>2;
1243		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1244	}
1245	iph->tos = inet->tos;
1246	iph->tot_len = htons(skb->len);
1247	iph->frag_off = df;
1248	ip_select_ident(iph, &rt->u.dst, sk);
1249	iph->ttl = ttl;
1250	iph->protocol = sk->sk_protocol;
1251	iph->saddr = rt->rt_src;
1252	iph->daddr = rt->rt_dst;
1253	ip_send_check(iph);
1254
1255	skb->priority = sk->sk_priority;
1256	skb->dst = dst_clone(&rt->u.dst);
1257
1258	/* Netfilter gets whole the not fragmented skb. */
1259	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1260		      skb->dst->dev, dst_output);
1261	if (err) {
1262		if (err > 0)
1263			err = inet->recverr ? net_xmit_errno(err) : 0;
1264		if (err)
1265			goto error;
1266	}
1267
1268out:
1269	inet->cork.flags &= ~IPCORK_OPT;
1270	kfree(inet->cork.opt);
1271	inet->cork.opt = NULL;
1272	if (inet->cork.rt) {
1273		ip_rt_put(inet->cork.rt);
1274		inet->cork.rt = NULL;
1275	}
1276	return err;
1277
1278error:
1279	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1280	goto out;
1281}
1282
1283/*
1284 *	Throw away all pending data on the socket.
1285 */
1286void ip_flush_pending_frames(struct sock *sk)
1287{
1288	struct inet_sock *inet = inet_sk(sk);
1289	struct sk_buff *skb;
1290
1291	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1292		kfree_skb(skb);
1293
1294	inet->cork.flags &= ~IPCORK_OPT;
1295	kfree(inet->cork.opt);
1296	inet->cork.opt = NULL;
1297	if (inet->cork.rt) {
1298		ip_rt_put(inet->cork.rt);
1299		inet->cork.rt = NULL;
1300	}
1301}
1302
1303
1304/*
1305 *	Fetch data from kernel space and fill in checksum if needed.
1306 */
1307static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1308			      int len, int odd, struct sk_buff *skb)
1309{
1310	__wsum csum;
1311
1312	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1313	skb->csum = csum_block_add(skb->csum, csum, odd);
1314	return 0;
1315}
1316
1317/*
1318 *	Generic function to send a packet as reply to another packet.
1319 *	Used to send TCP resets so far. ICMP should use this function too.
1320 *
1321 *	Should run single threaded per socket because it uses the sock
1322 *     	structure to pass arguments.
1323 *
1324 *	LATER: switch from ip_build_xmit to ip_append_*
1325 */
1326void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1327		   unsigned int len)
1328{
1329	struct inet_sock *inet = inet_sk(sk);
1330	struct {
1331		struct ip_options	opt;
1332		char			data[40];
1333	} replyopts;
1334	struct ipcm_cookie ipc;
1335	__be32 daddr;
1336	struct rtable *rt = (struct rtable*)skb->dst;
1337
1338	if (ip_options_echo(&replyopts.opt, skb))
1339		return;
1340
1341	daddr = ipc.addr = rt->rt_src;
1342	ipc.opt = NULL;
1343
1344	if (replyopts.opt.optlen) {
1345		ipc.opt = &replyopts.opt;
1346
1347		if (ipc.opt->srr)
1348			daddr = replyopts.opt.faddr;
1349	}
1350
1351	{
1352		struct flowi fl = { .nl_u = { .ip4_u =
1353					      { .daddr = daddr,
1354						.saddr = rt->rt_spec_dst,
1355						.tos = RT_TOS(skb->nh.iph->tos) } },
1356				    /* Not quite clean, but right. */
1357				    .uli_u = { .ports =
1358					       { .sport = skb->h.th->dest,
1359						 .dport = skb->h.th->source } },
1360				    .proto = sk->sk_protocol };
1361		security_skb_classify_flow(skb, &fl);
1362		if (ip_route_output_key(&rt, &fl))
1363			return;
1364	}
1365
1366	/* And let IP do all the hard work.
1367
1368	   This chunk is not reenterable, hence spinlock.
1369	   Note that it uses the fact, that this function is called
1370	   with locally disabled BH and that sk cannot be already spinlocked.
1371	 */
1372	bh_lock_sock(sk);
1373	inet->tos = skb->nh.iph->tos;
1374	sk->sk_priority = skb->priority;
1375	sk->sk_protocol = skb->nh.iph->protocol;
1376	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1377		       &ipc, rt, MSG_DONTWAIT);
1378	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1379		if (arg->csumoffset >= 0)
1380			*((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1381		skb->ip_summed = CHECKSUM_NONE;
1382		ip_push_pending_frames(sk);
1383	}
1384
1385	bh_unlock_sock(sk);
1386
1387	ip_rt_put(rt);
1388}
1389
1390void __init ip_init(void)
1391{
1392	ip_rt_init();
1393	inet_initpeers();
1394
1395#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1396	igmp_mc_proc_init();
1397#endif
1398}
1399
1400EXPORT_SYMBOL(ip_generic_getfrag);
1401EXPORT_SYMBOL(ip_queue_xmit);
1402EXPORT_SYMBOL(ip_send_check);
1403