ip6_output.c revision 53b7997fd5c62408d10b9aafb38974ce90fd2356
1/*
2 *	IPv6 output functions
3 *	Linux INET6 implementation
4 *
5 *	Authors:
6 *	Pedro Roque		<roque@di.fc.ul.pt>
7 *
8 *	Based on linux/net/ipv4/ip_output.c
9 *
10 *	This program is free software; you can redistribute it and/or
11 *      modify it under the terms of the GNU General Public License
12 *      as published by the Free Software Foundation; either version
13 *      2 of the License, or (at your option) any later version.
14 *
15 *	Changes:
16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17 *				extension headers are implemented.
18 *				route changes now work.
19 *				ip6_forward does not confuse sniffers.
20 *				etc.
21 *
22 *      H. von Brand    :       Added missing #include <linux/string.h>
23 *	Imran Patel	: 	frag id should be in NBO
24 *      Kazunori MIYAZAWA @USAGI
25 *			:       add ip6_append_data and related functions
26 *				for datagram xmit
27 */
28
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
39#include <linux/module.h>
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
56#include <linux/mroute6.h>
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61{
62	static u32 ipv6_fragmentation_id = 1;
63	static DEFINE_SPINLOCK(ip6_id_lock);
64
65	spin_lock_bh(&ip6_id_lock);
66	fhdr->identification = htonl(ipv6_fragmentation_id);
67	if (++ipv6_fragmentation_id == 0)
68		ipv6_fragmentation_id = 1;
69	spin_unlock_bh(&ip6_id_lock);
70}
71
72int __ip6_local_out(struct sk_buff *skb)
73{
74	int len;
75
76	len = skb->len - sizeof(struct ipv6hdr);
77	if (len > IPV6_MAXPLEN)
78		len = 0;
79	ipv6_hdr(skb)->payload_len = htons(len);
80
81	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82		       dst_output);
83}
84
85int ip6_local_out(struct sk_buff *skb)
86{
87	int err;
88
89	err = __ip6_local_out(skb);
90	if (likely(err == 1))
91		err = dst_output(skb);
92
93	return err;
94}
95EXPORT_SYMBOL_GPL(ip6_local_out);
96
97static int ip6_output_finish(struct sk_buff *skb)
98{
99	struct dst_entry *dst = skb->dst;
100
101	if (dst->hh)
102		return neigh_hh_output(dst->hh, skb);
103	else if (dst->neighbour)
104		return dst->neighbour->output(skb);
105
106	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107	kfree_skb(skb);
108	return -EINVAL;
109
110}
111
112/* dev_loopback_xmit for use with netfilter. */
113static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114{
115	skb_reset_mac_header(newskb);
116	__skb_pull(newskb, skb_network_offset(newskb));
117	newskb->pkt_type = PACKET_LOOPBACK;
118	newskb->ip_summed = CHECKSUM_UNNECESSARY;
119	BUG_TRAP(newskb->dst);
120
121	netif_rx(newskb);
122	return 0;
123}
124
125
126static int ip6_output2(struct sk_buff *skb)
127{
128	struct dst_entry *dst = skb->dst;
129	struct net_device *dev = dst->dev;
130
131	skb->protocol = htons(ETH_P_IPV6);
132	skb->dev = dev;
133
134	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
137
138		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139		    ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141					 &ipv6_hdr(skb)->saddr))) {
142			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144			/* Do not check for IFF_ALLMULTI; multicast routing
145			   is not supported in any case.
146			 */
147			if (newskb)
148				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149					NULL, newskb->dev,
150					ip6_dev_loopback_xmit);
151
152			if (ipv6_hdr(skb)->hop_limit == 0) {
153				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154				kfree_skb(skb);
155				return 0;
156			}
157		}
158
159		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160	}
161
162	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163		       ip6_output_finish);
164}
165
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171	       skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
174int ip6_output(struct sk_buff *skb)
175{
176	struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177	if (unlikely(idev->cnf.disable_ipv6)) {
178		IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179		kfree_skb(skb);
180		return 0;
181	}
182
183	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
184				dst_allfrag(skb->dst))
185		return ip6_fragment(skb, ip6_output2);
186	else
187		return ip6_output2(skb);
188}
189
190/*
191 *	xmit an sk_buff (used by TCP)
192 */
193
194int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195	     struct ipv6_txoptions *opt, int ipfragok)
196{
197	struct ipv6_pinfo *np = inet6_sk(sk);
198	struct in6_addr *first_hop = &fl->fl6_dst;
199	struct dst_entry *dst = skb->dst;
200	struct ipv6hdr *hdr;
201	u8  proto = fl->proto;
202	int seg_len = skb->len;
203	int hlimit, tclass;
204	u32 mtu;
205
206	if (opt) {
207		unsigned int head_room;
208
209		/* First: exthdrs may take lots of space (~8K for now)
210		   MAX_HEADER is not enough.
211		 */
212		head_room = opt->opt_nflen + opt->opt_flen;
213		seg_len += head_room;
214		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
215
216		if (skb_headroom(skb) < head_room) {
217			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
218			if (skb2 == NULL) {
219				IP6_INC_STATS(ip6_dst_idev(skb->dst),
220					      IPSTATS_MIB_OUTDISCARDS);
221				kfree_skb(skb);
222				return -ENOBUFS;
223			}
224			kfree_skb(skb);
225			skb = skb2;
226			if (sk)
227				skb_set_owner_w(skb, sk);
228		}
229		if (opt->opt_flen)
230			ipv6_push_frag_opts(skb, opt, &proto);
231		if (opt->opt_nflen)
232			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
233	}
234
235	skb_push(skb, sizeof(struct ipv6hdr));
236	skb_reset_network_header(skb);
237	hdr = ipv6_hdr(skb);
238
239	/*
240	 *	Fill in the IPv6 header
241	 */
242
243	hlimit = -1;
244	if (np)
245		hlimit = np->hop_limit;
246	if (hlimit < 0)
247		hlimit = ip6_dst_hoplimit(dst);
248
249	tclass = -1;
250	if (np)
251		tclass = np->tclass;
252	if (tclass < 0)
253		tclass = 0;
254
255	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
256
257	hdr->payload_len = htons(seg_len);
258	hdr->nexthdr = proto;
259	hdr->hop_limit = hlimit;
260
261	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262	ipv6_addr_copy(&hdr->daddr, first_hop);
263
264	skb->priority = sk->sk_priority;
265	skb->mark = sk->sk_mark;
266
267	mtu = dst_mtu(dst);
268	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
269		IP6_INC_STATS(ip6_dst_idev(skb->dst),
270			      IPSTATS_MIB_OUTREQUESTS);
271		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
272				dst_output);
273	}
274
275	if (net_ratelimit())
276		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
277	skb->dev = dst->dev;
278	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
279	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
280	kfree_skb(skb);
281	return -EMSGSIZE;
282}
283
284EXPORT_SYMBOL(ip6_xmit);
285
286/*
287 *	To avoid extra problems ND packets are send through this
288 *	routine. It's code duplication but I really want to avoid
289 *	extra checks since ipv6_build_header is used by TCP (which
290 *	is for us performance critical)
291 */
292
293int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
294	       const struct in6_addr *saddr, const struct in6_addr *daddr,
295	       int proto, int len)
296{
297	struct ipv6_pinfo *np = inet6_sk(sk);
298	struct ipv6hdr *hdr;
299	int totlen;
300
301	skb->protocol = htons(ETH_P_IPV6);
302	skb->dev = dev;
303
304	totlen = len + sizeof(struct ipv6hdr);
305
306	skb_reset_network_header(skb);
307	skb_put(skb, sizeof(struct ipv6hdr));
308	hdr = ipv6_hdr(skb);
309
310	*(__be32*)hdr = htonl(0x60000000);
311
312	hdr->payload_len = htons(len);
313	hdr->nexthdr = proto;
314	hdr->hop_limit = np->hop_limit;
315
316	ipv6_addr_copy(&hdr->saddr, saddr);
317	ipv6_addr_copy(&hdr->daddr, daddr);
318
319	return 0;
320}
321
322static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
323{
324	struct ip6_ra_chain *ra;
325	struct sock *last = NULL;
326
327	read_lock(&ip6_ra_lock);
328	for (ra = ip6_ra_chain; ra; ra = ra->next) {
329		struct sock *sk = ra->sk;
330		if (sk && ra->sel == sel &&
331		    (!sk->sk_bound_dev_if ||
332		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
333			if (last) {
334				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
335				if (skb2)
336					rawv6_rcv(last, skb2);
337			}
338			last = sk;
339		}
340	}
341
342	if (last) {
343		rawv6_rcv(last, skb);
344		read_unlock(&ip6_ra_lock);
345		return 1;
346	}
347	read_unlock(&ip6_ra_lock);
348	return 0;
349}
350
351static int ip6_forward_proxy_check(struct sk_buff *skb)
352{
353	struct ipv6hdr *hdr = ipv6_hdr(skb);
354	u8 nexthdr = hdr->nexthdr;
355	int offset;
356
357	if (ipv6_ext_hdr(nexthdr)) {
358		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
359		if (offset < 0)
360			return 0;
361	} else
362		offset = sizeof(struct ipv6hdr);
363
364	if (nexthdr == IPPROTO_ICMPV6) {
365		struct icmp6hdr *icmp6;
366
367		if (!pskb_may_pull(skb, (skb_network_header(skb) +
368					 offset + 1 - skb->data)))
369			return 0;
370
371		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
372
373		switch (icmp6->icmp6_type) {
374		case NDISC_ROUTER_SOLICITATION:
375		case NDISC_ROUTER_ADVERTISEMENT:
376		case NDISC_NEIGHBOUR_SOLICITATION:
377		case NDISC_NEIGHBOUR_ADVERTISEMENT:
378		case NDISC_REDIRECT:
379			/* For reaction involving unicast neighbor discovery
380			 * message destined to the proxied address, pass it to
381			 * input function.
382			 */
383			return 1;
384		default:
385			break;
386		}
387	}
388
389	/*
390	 * The proxying router can't forward traffic sent to a link-local
391	 * address, so signal the sender and discard the packet. This
392	 * behavior is clarified by the MIPv6 specification.
393	 */
394	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
395		dst_link_failure(skb);
396		return -1;
397	}
398
399	return 0;
400}
401
402static inline int ip6_forward_finish(struct sk_buff *skb)
403{
404	return dst_output(skb);
405}
406
407int ip6_forward(struct sk_buff *skb)
408{
409	struct dst_entry *dst = skb->dst;
410	struct ipv6hdr *hdr = ipv6_hdr(skb);
411	struct inet6_skb_parm *opt = IP6CB(skb);
412	struct net *net = dev_net(dst->dev);
413
414	if (net->ipv6.devconf_all->forwarding == 0)
415		goto error;
416
417	if (skb_warn_if_lro(skb))
418		goto drop;
419
420	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
421		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
422		goto drop;
423	}
424
425	skb_forward_csum(skb);
426
427	/*
428	 *	We DO NOT make any processing on
429	 *	RA packets, pushing them to user level AS IS
430	 *	without ane WARRANTY that application will be able
431	 *	to interpret them. The reason is that we
432	 *	cannot make anything clever here.
433	 *
434	 *	We are not end-node, so that if packet contains
435	 *	AH/ESP, we cannot make anything.
436	 *	Defragmentation also would be mistake, RA packets
437	 *	cannot be fragmented, because there is no warranty
438	 *	that different fragments will go along one path. --ANK
439	 */
440	if (opt->ra) {
441		u8 *ptr = skb_network_header(skb) + opt->ra;
442		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
443			return 0;
444	}
445
446	/*
447	 *	check and decrement ttl
448	 */
449	if (hdr->hop_limit <= 1) {
450		/* Force OUTPUT device used as source address */
451		skb->dev = dst->dev;
452		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
453			    0, skb->dev);
454		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
455
456		kfree_skb(skb);
457		return -ETIMEDOUT;
458	}
459
460	/* XXX: idev->cnf.proxy_ndp? */
461	if (net->ipv6.devconf_all->proxy_ndp &&
462	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
463		int proxied = ip6_forward_proxy_check(skb);
464		if (proxied > 0)
465			return ip6_input(skb);
466		else if (proxied < 0) {
467			IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
468			goto drop;
469		}
470	}
471
472	if (!xfrm6_route_forward(skb)) {
473		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
474		goto drop;
475	}
476	dst = skb->dst;
477
478	/* IPv6 specs say nothing about it, but it is clear that we cannot
479	   send redirects to source routed frames.
480	   We don't send redirects to frames decapsulated from IPsec.
481	 */
482	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483	    !skb->sp) {
484		struct in6_addr *target = NULL;
485		struct rt6_info *rt;
486		struct neighbour *n = dst->neighbour;
487
488		/*
489		 *	incoming and outgoing devices are the same
490		 *	send a redirect.
491		 */
492
493		rt = (struct rt6_info *) dst;
494		if ((rt->rt6i_flags & RTF_GATEWAY))
495			target = (struct in6_addr*)&n->primary_key;
496		else
497			target = &hdr->daddr;
498
499		/* Limit redirects both by destination (here)
500		   and by source (inside ndisc_send_redirect)
501		 */
502		if (xrlim_allow(dst, 1*HZ))
503			ndisc_send_redirect(skb, n, target);
504	} else {
505		int addrtype = ipv6_addr_type(&hdr->saddr);
506
507		/* This check is security critical. */
508		if (addrtype == IPV6_ADDR_ANY ||
509		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
510			goto error;
511		if (addrtype & IPV6_ADDR_LINKLOCAL) {
512			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514			goto error;
515		}
516	}
517
518	if (skb->len > dst_mtu(dst)) {
519		/* Again, force OUTPUT device used as source address */
520		skb->dev = dst->dev;
521		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
522		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
524		kfree_skb(skb);
525		return -EMSGSIZE;
526	}
527
528	if (skb_cow(skb, dst->dev->hard_header_len)) {
529		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
530		goto drop;
531	}
532
533	hdr = ipv6_hdr(skb);
534
535	/* Mangling hops number delayed to point after skb COW */
536
537	hdr->hop_limit--;
538
539	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
540	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
541		       ip6_forward_finish);
542
543error:
544	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
545drop:
546	kfree_skb(skb);
547	return -EINVAL;
548}
549
550static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
551{
552	to->pkt_type = from->pkt_type;
553	to->priority = from->priority;
554	to->protocol = from->protocol;
555	dst_release(to->dst);
556	to->dst = dst_clone(from->dst);
557	to->dev = from->dev;
558	to->mark = from->mark;
559
560#ifdef CONFIG_NET_SCHED
561	to->tc_index = from->tc_index;
562#endif
563	nf_copy(to, from);
564#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
565    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
566	to->nf_trace = from->nf_trace;
567#endif
568	skb_copy_secmark(to, from);
569}
570
571int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
572{
573	u16 offset = sizeof(struct ipv6hdr);
574	struct ipv6_opt_hdr *exthdr =
575				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
576	unsigned int packet_len = skb->tail - skb->network_header;
577	int found_rhdr = 0;
578	*nexthdr = &ipv6_hdr(skb)->nexthdr;
579
580	while (offset + 1 <= packet_len) {
581
582		switch (**nexthdr) {
583
584		case NEXTHDR_HOP:
585			break;
586		case NEXTHDR_ROUTING:
587			found_rhdr = 1;
588			break;
589		case NEXTHDR_DEST:
590#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
591			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
592				break;
593#endif
594			if (found_rhdr)
595				return offset;
596			break;
597		default :
598			return offset;
599		}
600
601		offset += ipv6_optlen(exthdr);
602		*nexthdr = &exthdr->nexthdr;
603		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
604						 offset);
605	}
606
607	return offset;
608}
609
610static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
611{
612	struct net_device *dev;
613	struct sk_buff *frag;
614	struct rt6_info *rt = (struct rt6_info*)skb->dst;
615	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
616	struct ipv6hdr *tmp_hdr;
617	struct frag_hdr *fh;
618	unsigned int mtu, hlen, left, len;
619	__be32 frag_id = 0;
620	int ptr, offset = 0, err=0;
621	u8 *prevhdr, nexthdr = 0;
622
623	dev = rt->u.dst.dev;
624	hlen = ip6_find_1stfragopt(skb, &prevhdr);
625	nexthdr = *prevhdr;
626
627	mtu = ip6_skb_dst_mtu(skb);
628
629	/* We must not fragment if the socket is set to force MTU discovery
630	 * or if the skb it not generated by a local socket.  (This last
631	 * check should be redundant, but it's free.)
632	 */
633	if (!skb->local_df) {
634		skb->dev = skb->dst->dev;
635		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
636		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
637		kfree_skb(skb);
638		return -EMSGSIZE;
639	}
640
641	if (np && np->frag_size < mtu) {
642		if (np->frag_size)
643			mtu = np->frag_size;
644	}
645	mtu -= hlen + sizeof(struct frag_hdr);
646
647	if (skb_shinfo(skb)->frag_list) {
648		int first_len = skb_pagelen(skb);
649		int truesizes = 0;
650
651		if (first_len - hlen > mtu ||
652		    ((first_len - hlen) & 7) ||
653		    skb_cloned(skb))
654			goto slow_path;
655
656		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
657			/* Correct geometry. */
658			if (frag->len > mtu ||
659			    ((frag->len & 7) && frag->next) ||
660			    skb_headroom(frag) < hlen)
661			    goto slow_path;
662
663			/* Partially cloned skb? */
664			if (skb_shared(frag))
665				goto slow_path;
666
667			BUG_ON(frag->sk);
668			if (skb->sk) {
669				sock_hold(skb->sk);
670				frag->sk = skb->sk;
671				frag->destructor = sock_wfree;
672				truesizes += frag->truesize;
673			}
674		}
675
676		err = 0;
677		offset = 0;
678		frag = skb_shinfo(skb)->frag_list;
679		skb_shinfo(skb)->frag_list = NULL;
680		/* BUILD HEADER */
681
682		*prevhdr = NEXTHDR_FRAGMENT;
683		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
684		if (!tmp_hdr) {
685			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
686			return -ENOMEM;
687		}
688
689		__skb_pull(skb, hlen);
690		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
691		__skb_push(skb, hlen);
692		skb_reset_network_header(skb);
693		memcpy(skb_network_header(skb), tmp_hdr, hlen);
694
695		ipv6_select_ident(skb, fh);
696		fh->nexthdr = nexthdr;
697		fh->reserved = 0;
698		fh->frag_off = htons(IP6_MF);
699		frag_id = fh->identification;
700
701		first_len = skb_pagelen(skb);
702		skb->data_len = first_len - skb_headlen(skb);
703		skb->truesize -= truesizes;
704		skb->len = first_len;
705		ipv6_hdr(skb)->payload_len = htons(first_len -
706						   sizeof(struct ipv6hdr));
707
708		dst_hold(&rt->u.dst);
709
710		for (;;) {
711			/* Prepare header of the next frame,
712			 * before previous one went down. */
713			if (frag) {
714				frag->ip_summed = CHECKSUM_NONE;
715				skb_reset_transport_header(frag);
716				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
717				__skb_push(frag, hlen);
718				skb_reset_network_header(frag);
719				memcpy(skb_network_header(frag), tmp_hdr,
720				       hlen);
721				offset += skb->len - hlen - sizeof(struct frag_hdr);
722				fh->nexthdr = nexthdr;
723				fh->reserved = 0;
724				fh->frag_off = htons(offset);
725				if (frag->next != NULL)
726					fh->frag_off |= htons(IP6_MF);
727				fh->identification = frag_id;
728				ipv6_hdr(frag)->payload_len =
729						htons(frag->len -
730						      sizeof(struct ipv6hdr));
731				ip6_copy_metadata(frag, skb);
732			}
733
734			err = output(skb);
735			if(!err)
736				IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
737
738			if (err || !frag)
739				break;
740
741			skb = frag;
742			frag = skb->next;
743			skb->next = NULL;
744		}
745
746		kfree(tmp_hdr);
747
748		if (err == 0) {
749			IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
750			dst_release(&rt->u.dst);
751			return 0;
752		}
753
754		while (frag) {
755			skb = frag->next;
756			kfree_skb(frag);
757			frag = skb;
758		}
759
760		IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
761		dst_release(&rt->u.dst);
762		return err;
763	}
764
765slow_path:
766	left = skb->len - hlen;		/* Space per frame */
767	ptr = hlen;			/* Where to start from */
768
769	/*
770	 *	Fragment the datagram.
771	 */
772
773	*prevhdr = NEXTHDR_FRAGMENT;
774
775	/*
776	 *	Keep copying data until we run out.
777	 */
778	while(left > 0)	{
779		len = left;
780		/* IF: it doesn't fit, use 'mtu' - the data space left */
781		if (len > mtu)
782			len = mtu;
783		/* IF: we are not sending upto and including the packet end
784		   then align the next start on an eight byte boundary */
785		if (len < left)	{
786			len &= ~7;
787		}
788		/*
789		 *	Allocate buffer.
790		 */
791
792		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
793			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
794			IP6_INC_STATS(ip6_dst_idev(skb->dst),
795				      IPSTATS_MIB_FRAGFAILS);
796			err = -ENOMEM;
797			goto fail;
798		}
799
800		/*
801		 *	Set up data on packet
802		 */
803
804		ip6_copy_metadata(frag, skb);
805		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
806		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
807		skb_reset_network_header(frag);
808		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
809		frag->transport_header = (frag->network_header + hlen +
810					  sizeof(struct frag_hdr));
811
812		/*
813		 *	Charge the memory for the fragment to any owner
814		 *	it might possess
815		 */
816		if (skb->sk)
817			skb_set_owner_w(frag, skb->sk);
818
819		/*
820		 *	Copy the packet header into the new buffer.
821		 */
822		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
823
824		/*
825		 *	Build fragment header.
826		 */
827		fh->nexthdr = nexthdr;
828		fh->reserved = 0;
829		if (!frag_id) {
830			ipv6_select_ident(skb, fh);
831			frag_id = fh->identification;
832		} else
833			fh->identification = frag_id;
834
835		/*
836		 *	Copy a block of the IP datagram.
837		 */
838		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
839			BUG();
840		left -= len;
841
842		fh->frag_off = htons(offset);
843		if (left > 0)
844			fh->frag_off |= htons(IP6_MF);
845		ipv6_hdr(frag)->payload_len = htons(frag->len -
846						    sizeof(struct ipv6hdr));
847
848		ptr += len;
849		offset += len;
850
851		/*
852		 *	Put this fragment into the sending queue.
853		 */
854		err = output(frag);
855		if (err)
856			goto fail;
857
858		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
859	}
860	IP6_INC_STATS(ip6_dst_idev(skb->dst),
861		      IPSTATS_MIB_FRAGOKS);
862	kfree_skb(skb);
863	return err;
864
865fail:
866	IP6_INC_STATS(ip6_dst_idev(skb->dst),
867		      IPSTATS_MIB_FRAGFAILS);
868	kfree_skb(skb);
869	return err;
870}
871
872static inline int ip6_rt_check(struct rt6key *rt_key,
873			       struct in6_addr *fl_addr,
874			       struct in6_addr *addr_cache)
875{
876	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
878}
879
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881					  struct dst_entry *dst,
882					  struct flowi *fl)
883{
884	struct ipv6_pinfo *np = inet6_sk(sk);
885	struct rt6_info *rt = (struct rt6_info *)dst;
886
887	if (!dst)
888		goto out;
889
890	/* Yes, checking route validity in not connected
891	 * case is not very simple. Take into account,
892	 * that we do not support routing by source, TOS,
893	 * and MSG_DONTROUTE 		--ANK (980726)
894	 *
895	 * 1. ip6_rt_check(): If route was host route,
896	 *    check that cached destination is current.
897	 *    If it is network route, we still may
898	 *    check its validity using saved pointer
899	 *    to the last used address: daddr_cache.
900	 *    We do not want to save whole address now,
901	 *    (because main consumer of this service
902	 *    is tcp, which has not this problem),
903	 *    so that the last trick works only on connected
904	 *    sockets.
905	 * 2. oif also should be the same.
906	 */
907	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908#ifdef CONFIG_IPV6_SUBTREES
909	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910#endif
911	    (fl->oif && fl->oif != dst->dev->ifindex)) {
912		dst_release(dst);
913		dst = NULL;
914	}
915
916out:
917	return dst;
918}
919
920static int ip6_dst_lookup_tail(struct sock *sk,
921			       struct dst_entry **dst, struct flowi *fl)
922{
923	int err;
924	struct net *net = sock_net(sk);
925
926	if (*dst == NULL)
927		*dst = ip6_route_output(net, sk, fl);
928
929	if ((err = (*dst)->error))
930		goto out_err_release;
931
932	if (ipv6_addr_any(&fl->fl6_src)) {
933		err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
934					 &fl->fl6_dst,
935					 sk ? inet6_sk(sk)->srcprefs : 0,
936					 &fl->fl6_src);
937		if (err)
938			goto out_err_release;
939	}
940
941#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942		/*
943		 * Here if the dst entry we've looked up
944		 * has a neighbour entry that is in the INCOMPLETE
945		 * state and the src address from the flow is
946		 * marked as OPTIMISTIC, we release the found
947		 * dst entry and replace it instead with the
948		 * dst entry of the nexthop router
949		 */
950		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
951			struct inet6_ifaddr *ifp;
952			struct flowi fl_gw;
953			int redirect;
954
955			ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956					      (*dst)->dev, 1);
957
958			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959			if (ifp)
960				in6_ifa_put(ifp);
961
962			if (redirect) {
963				/*
964				 * We need to get the dst entry for the
965				 * default router instead
966				 */
967				dst_release(*dst);
968				memcpy(&fl_gw, fl, sizeof(struct flowi));
969				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970				*dst = ip6_route_output(net, sk, &fl_gw);
971				if ((err = (*dst)->error))
972					goto out_err_release;
973			}
974		}
975#endif
976
977	return 0;
978
979out_err_release:
980	if (err == -ENETUNREACH)
981		IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
982	dst_release(*dst);
983	*dst = NULL;
984	return err;
985}
986
987/**
988 *	ip6_dst_lookup - perform route lookup on flow
989 *	@sk: socket which provides route info
990 *	@dst: pointer to dst_entry * for result
991 *	@fl: flow to lookup
992 *
993 *	This function performs a route lookup on the given flow.
994 *
995 *	It returns zero on success, or a standard errno code on error.
996 */
997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998{
999	*dst = NULL;
1000	return ip6_dst_lookup_tail(sk, dst, fl);
1001}
1002EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
1004/**
1005 *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 *	@sk: socket which provides the dst cache and route info
1007 *	@dst: pointer to dst_entry * for result
1008 *	@fl: flow to lookup
1009 *
1010 *	This function performs a route lookup on the given flow with the
1011 *	possibility of using the cached route in the socket if it is valid.
1012 *	It will take the socket dst lock when operating on the dst cache.
1013 *	As a result, this function can only be used in process context.
1014 *
1015 *	It returns zero on success, or a standard errno code on error.
1016 */
1017int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1018{
1019	*dst = NULL;
1020	if (sk) {
1021		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022		*dst = ip6_sk_dst_check(sk, *dst, fl);
1023	}
1024
1025	return ip6_dst_lookup_tail(sk, dst, fl);
1026}
1027EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1028
1029static inline int ip6_ufo_append_data(struct sock *sk,
1030			int getfrag(void *from, char *to, int offset, int len,
1031			int odd, struct sk_buff *skb),
1032			void *from, int length, int hh_len, int fragheaderlen,
1033			int transhdrlen, int mtu,unsigned int flags)
1034
1035{
1036	struct sk_buff *skb;
1037	int err;
1038
1039	/* There is support for UDP large send offload by network
1040	 * device, so create one single skb packet containing complete
1041	 * udp datagram
1042	 */
1043	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044		skb = sock_alloc_send_skb(sk,
1045			hh_len + fragheaderlen + transhdrlen + 20,
1046			(flags & MSG_DONTWAIT), &err);
1047		if (skb == NULL)
1048			return -ENOMEM;
1049
1050		/* reserve space for Hardware header */
1051		skb_reserve(skb, hh_len);
1052
1053		/* create space for UDP/IP header */
1054		skb_put(skb,fragheaderlen + transhdrlen);
1055
1056		/* initialize network header pointer */
1057		skb_reset_network_header(skb);
1058
1059		/* initialize protocol header pointer */
1060		skb->transport_header = skb->network_header + fragheaderlen;
1061
1062		skb->ip_summed = CHECKSUM_PARTIAL;
1063		skb->csum = 0;
1064		sk->sk_sndmsg_off = 0;
1065	}
1066
1067	err = skb_append_datato_frags(sk,skb, getfrag, from,
1068				      (length - transhdrlen));
1069	if (!err) {
1070		struct frag_hdr fhdr;
1071
1072		/* specify the length of each IP datagram fragment*/
1073		skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1074					    sizeof(struct frag_hdr);
1075		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1076		ipv6_select_ident(skb, &fhdr);
1077		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1078		__skb_queue_tail(&sk->sk_write_queue, skb);
1079
1080		return 0;
1081	}
1082	/* There is not enough support do UPD LSO,
1083	 * so follow normal path
1084	 */
1085	kfree_skb(skb);
1086
1087	return err;
1088}
1089
1090int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1091	int offset, int len, int odd, struct sk_buff *skb),
1092	void *from, int length, int transhdrlen,
1093	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1094	struct rt6_info *rt, unsigned int flags)
1095{
1096	struct inet_sock *inet = inet_sk(sk);
1097	struct ipv6_pinfo *np = inet6_sk(sk);
1098	struct sk_buff *skb;
1099	unsigned int maxfraglen, fragheaderlen;
1100	int exthdrlen;
1101	int hh_len;
1102	int mtu;
1103	int copy;
1104	int err;
1105	int offset = 0;
1106	int csummode = CHECKSUM_NONE;
1107
1108	if (flags&MSG_PROBE)
1109		return 0;
1110	if (skb_queue_empty(&sk->sk_write_queue)) {
1111		/*
1112		 * setup for corking
1113		 */
1114		if (opt) {
1115			if (np->cork.opt == NULL) {
1116				np->cork.opt = kmalloc(opt->tot_len,
1117						       sk->sk_allocation);
1118				if (unlikely(np->cork.opt == NULL))
1119					return -ENOBUFS;
1120			} else if (np->cork.opt->tot_len < opt->tot_len) {
1121				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1122				return -EINVAL;
1123			}
1124			memcpy(np->cork.opt, opt, opt->tot_len);
1125			inet->cork.flags |= IPCORK_OPT;
1126			/* need source address above miyazawa*/
1127		}
1128		dst_hold(&rt->u.dst);
1129		inet->cork.dst = &rt->u.dst;
1130		inet->cork.fl = *fl;
1131		np->cork.hop_limit = hlimit;
1132		np->cork.tclass = tclass;
1133		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1134		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1135		if (np->frag_size < mtu) {
1136			if (np->frag_size)
1137				mtu = np->frag_size;
1138		}
1139		inet->cork.fragsize = mtu;
1140		if (dst_allfrag(rt->u.dst.path))
1141			inet->cork.flags |= IPCORK_ALLFRAG;
1142		inet->cork.length = 0;
1143		sk->sk_sndmsg_page = NULL;
1144		sk->sk_sndmsg_off = 0;
1145		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1146			    rt->rt6i_nfheader_len;
1147		length += exthdrlen;
1148		transhdrlen += exthdrlen;
1149	} else {
1150		rt = (struct rt6_info *)inet->cork.dst;
1151		fl = &inet->cork.fl;
1152		if (inet->cork.flags & IPCORK_OPT)
1153			opt = np->cork.opt;
1154		transhdrlen = 0;
1155		exthdrlen = 0;
1156		mtu = inet->cork.fragsize;
1157	}
1158
1159	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1160
1161	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1162			(opt ? opt->opt_nflen : 0);
1163	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1164
1165	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1166		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1167			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1168			return -EMSGSIZE;
1169		}
1170	}
1171
1172	/*
1173	 * Let's try using as much space as possible.
1174	 * Use MTU if total length of the message fits into the MTU.
1175	 * Otherwise, we need to reserve fragment header and
1176	 * fragment alignment (= 8-15 octects, in total).
1177	 *
1178	 * Note that we may need to "move" the data from the tail of
1179	 * of the buffer to the new fragment when we split
1180	 * the message.
1181	 *
1182	 * FIXME: It may be fragmented into multiple chunks
1183	 *        at once if non-fragmentable extension headers
1184	 *        are too large.
1185	 * --yoshfuji
1186	 */
1187
1188	inet->cork.length += length;
1189	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1190	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1191
1192		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1193					  fragheaderlen, transhdrlen, mtu,
1194					  flags);
1195		if (err)
1196			goto error;
1197		return 0;
1198	}
1199
1200	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1201		goto alloc_new_skb;
1202
1203	while (length > 0) {
1204		/* Check if the remaining data fits into current packet. */
1205		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1206		if (copy < length)
1207			copy = maxfraglen - skb->len;
1208
1209		if (copy <= 0) {
1210			char *data;
1211			unsigned int datalen;
1212			unsigned int fraglen;
1213			unsigned int fraggap;
1214			unsigned int alloclen;
1215			struct sk_buff *skb_prev;
1216alloc_new_skb:
1217			skb_prev = skb;
1218
1219			/* There's no room in the current skb */
1220			if (skb_prev)
1221				fraggap = skb_prev->len - maxfraglen;
1222			else
1223				fraggap = 0;
1224
1225			/*
1226			 * If remaining data exceeds the mtu,
1227			 * we know we need more fragment(s).
1228			 */
1229			datalen = length + fraggap;
1230			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1231				datalen = maxfraglen - fragheaderlen;
1232
1233			fraglen = datalen + fragheaderlen;
1234			if ((flags & MSG_MORE) &&
1235			    !(rt->u.dst.dev->features&NETIF_F_SG))
1236				alloclen = mtu;
1237			else
1238				alloclen = datalen + fragheaderlen;
1239
1240			/*
1241			 * The last fragment gets additional space at tail.
1242			 * Note: we overallocate on fragments with MSG_MODE
1243			 * because we have no idea if we're the last one.
1244			 */
1245			if (datalen == length + fraggap)
1246				alloclen += rt->u.dst.trailer_len;
1247
1248			/*
1249			 * We just reserve space for fragment header.
1250			 * Note: this may be overallocation if the message
1251			 * (without MSG_MORE) fits into the MTU.
1252			 */
1253			alloclen += sizeof(struct frag_hdr);
1254
1255			if (transhdrlen) {
1256				skb = sock_alloc_send_skb(sk,
1257						alloclen + hh_len,
1258						(flags & MSG_DONTWAIT), &err);
1259			} else {
1260				skb = NULL;
1261				if (atomic_read(&sk->sk_wmem_alloc) <=
1262				    2 * sk->sk_sndbuf)
1263					skb = sock_wmalloc(sk,
1264							   alloclen + hh_len, 1,
1265							   sk->sk_allocation);
1266				if (unlikely(skb == NULL))
1267					err = -ENOBUFS;
1268			}
1269			if (skb == NULL)
1270				goto error;
1271			/*
1272			 *	Fill in the control structures
1273			 */
1274			skb->ip_summed = csummode;
1275			skb->csum = 0;
1276			/* reserve for fragmentation */
1277			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1278
1279			/*
1280			 *	Find where to start putting bytes
1281			 */
1282			data = skb_put(skb, fraglen);
1283			skb_set_network_header(skb, exthdrlen);
1284			data += fragheaderlen;
1285			skb->transport_header = (skb->network_header +
1286						 fragheaderlen);
1287			if (fraggap) {
1288				skb->csum = skb_copy_and_csum_bits(
1289					skb_prev, maxfraglen,
1290					data + transhdrlen, fraggap, 0);
1291				skb_prev->csum = csum_sub(skb_prev->csum,
1292							  skb->csum);
1293				data += fraggap;
1294				pskb_trim_unique(skb_prev, maxfraglen);
1295			}
1296			copy = datalen - transhdrlen - fraggap;
1297			if (copy < 0) {
1298				err = -EINVAL;
1299				kfree_skb(skb);
1300				goto error;
1301			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1302				err = -EFAULT;
1303				kfree_skb(skb);
1304				goto error;
1305			}
1306
1307			offset += copy;
1308			length -= datalen - fraggap;
1309			transhdrlen = 0;
1310			exthdrlen = 0;
1311			csummode = CHECKSUM_NONE;
1312
1313			/*
1314			 * Put the packet on the pending queue
1315			 */
1316			__skb_queue_tail(&sk->sk_write_queue, skb);
1317			continue;
1318		}
1319
1320		if (copy > length)
1321			copy = length;
1322
1323		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1324			unsigned int off;
1325
1326			off = skb->len;
1327			if (getfrag(from, skb_put(skb, copy),
1328						offset, copy, off, skb) < 0) {
1329				__skb_trim(skb, off);
1330				err = -EFAULT;
1331				goto error;
1332			}
1333		} else {
1334			int i = skb_shinfo(skb)->nr_frags;
1335			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1336			struct page *page = sk->sk_sndmsg_page;
1337			int off = sk->sk_sndmsg_off;
1338			unsigned int left;
1339
1340			if (page && (left = PAGE_SIZE - off) > 0) {
1341				if (copy >= left)
1342					copy = left;
1343				if (page != frag->page) {
1344					if (i == MAX_SKB_FRAGS) {
1345						err = -EMSGSIZE;
1346						goto error;
1347					}
1348					get_page(page);
1349					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1350					frag = &skb_shinfo(skb)->frags[i];
1351				}
1352			} else if(i < MAX_SKB_FRAGS) {
1353				if (copy > PAGE_SIZE)
1354					copy = PAGE_SIZE;
1355				page = alloc_pages(sk->sk_allocation, 0);
1356				if (page == NULL) {
1357					err = -ENOMEM;
1358					goto error;
1359				}
1360				sk->sk_sndmsg_page = page;
1361				sk->sk_sndmsg_off = 0;
1362
1363				skb_fill_page_desc(skb, i, page, 0, 0);
1364				frag = &skb_shinfo(skb)->frags[i];
1365			} else {
1366				err = -EMSGSIZE;
1367				goto error;
1368			}
1369			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1370				err = -EFAULT;
1371				goto error;
1372			}
1373			sk->sk_sndmsg_off += copy;
1374			frag->size += copy;
1375			skb->len += copy;
1376			skb->data_len += copy;
1377			skb->truesize += copy;
1378			atomic_add(copy, &sk->sk_wmem_alloc);
1379		}
1380		offset += copy;
1381		length -= copy;
1382	}
1383	return 0;
1384error:
1385	inet->cork.length -= length;
1386	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1387	return err;
1388}
1389
1390static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1391{
1392	inet->cork.flags &= ~IPCORK_OPT;
1393	kfree(np->cork.opt);
1394	np->cork.opt = NULL;
1395	if (inet->cork.dst) {
1396		dst_release(inet->cork.dst);
1397		inet->cork.dst = NULL;
1398		inet->cork.flags &= ~IPCORK_ALLFRAG;
1399	}
1400	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1401}
1402
1403int ip6_push_pending_frames(struct sock *sk)
1404{
1405	struct sk_buff *skb, *tmp_skb;
1406	struct sk_buff **tail_skb;
1407	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1408	struct inet_sock *inet = inet_sk(sk);
1409	struct ipv6_pinfo *np = inet6_sk(sk);
1410	struct ipv6hdr *hdr;
1411	struct ipv6_txoptions *opt = np->cork.opt;
1412	struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1413	struct flowi *fl = &inet->cork.fl;
1414	unsigned char proto = fl->proto;
1415	int err = 0;
1416
1417	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1418		goto out;
1419	tail_skb = &(skb_shinfo(skb)->frag_list);
1420
1421	/* move skb->data to ip header from ext header */
1422	if (skb->data < skb_network_header(skb))
1423		__skb_pull(skb, skb_network_offset(skb));
1424	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1425		__skb_pull(tmp_skb, skb_network_header_len(skb));
1426		*tail_skb = tmp_skb;
1427		tail_skb = &(tmp_skb->next);
1428		skb->len += tmp_skb->len;
1429		skb->data_len += tmp_skb->len;
1430		skb->truesize += tmp_skb->truesize;
1431		__sock_put(tmp_skb->sk);
1432		tmp_skb->destructor = NULL;
1433		tmp_skb->sk = NULL;
1434	}
1435
1436	/* Allow local fragmentation. */
1437	if (np->pmtudisc < IPV6_PMTUDISC_DO)
1438		skb->local_df = 1;
1439
1440	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1441	__skb_pull(skb, skb_network_header_len(skb));
1442	if (opt && opt->opt_flen)
1443		ipv6_push_frag_opts(skb, opt, &proto);
1444	if (opt && opt->opt_nflen)
1445		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1446
1447	skb_push(skb, sizeof(struct ipv6hdr));
1448	skb_reset_network_header(skb);
1449	hdr = ipv6_hdr(skb);
1450
1451	*(__be32*)hdr = fl->fl6_flowlabel |
1452		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1453
1454	hdr->hop_limit = np->cork.hop_limit;
1455	hdr->nexthdr = proto;
1456	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1457	ipv6_addr_copy(&hdr->daddr, final_dst);
1458
1459	skb->priority = sk->sk_priority;
1460	skb->mark = sk->sk_mark;
1461
1462	skb->dst = dst_clone(&rt->u.dst);
1463	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1464	if (proto == IPPROTO_ICMPV6) {
1465		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1466
1467		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1468		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1469	}
1470
1471	err = ip6_local_out(skb);
1472	if (err) {
1473		if (err > 0)
1474			err = np->recverr ? net_xmit_errno(err) : 0;
1475		if (err)
1476			goto error;
1477	}
1478
1479out:
1480	ip6_cork_release(inet, np);
1481	return err;
1482error:
1483	goto out;
1484}
1485
1486void ip6_flush_pending_frames(struct sock *sk)
1487{
1488	struct sk_buff *skb;
1489
1490	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1491		if (skb->dst)
1492			IP6_INC_STATS(ip6_dst_idev(skb->dst),
1493				      IPSTATS_MIB_OUTDISCARDS);
1494		kfree_skb(skb);
1495	}
1496
1497	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1498}
1499