ip6_output.c revision 5110effee8fde2edfacac9cd12a9960ab2dc39ea
1/* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29#include <linux/errno.h> 30#include <linux/kernel.h> 31#include <linux/string.h> 32#include <linux/socket.h> 33#include <linux/net.h> 34#include <linux/netdevice.h> 35#include <linux/if_arp.h> 36#include <linux/in6.h> 37#include <linux/tcp.h> 38#include <linux/route.h> 39#include <linux/module.h> 40#include <linux/slab.h> 41 42#include <linux/netfilter.h> 43#include <linux/netfilter_ipv6.h> 44 45#include <net/sock.h> 46#include <net/snmp.h> 47 48#include <net/ipv6.h> 49#include <net/ndisc.h> 50#include <net/protocol.h> 51#include <net/ip6_route.h> 52#include <net/addrconf.h> 53#include <net/rawv6.h> 54#include <net/icmp.h> 55#include <net/xfrm.h> 56#include <net/checksum.h> 57#include <linux/mroute6.h> 58 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61int __ip6_local_out(struct sk_buff *skb) 62{ 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72} 73 74int ip6_local_out(struct sk_buff *skb) 75{ 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83} 84EXPORT_SYMBOL_GPL(ip6_local_out); 85 86static int ip6_finish_output2(struct sk_buff *skb) 87{ 88 struct dst_entry *dst = skb_dst(skb); 89 struct net_device *dev = dst->dev; 90 struct neighbour *neigh; 91 92 skb->protocol = htons(ETH_P_IPV6); 93 skb->dev = dev; 94 95 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 96 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 97 98 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 99 ((mroute6_socket(dev_net(dev), skb) && 100 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 101 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 102 &ipv6_hdr(skb)->saddr))) { 103 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 104 105 /* Do not check for IFF_ALLMULTI; multicast routing 106 is not supported in any case. 107 */ 108 if (newskb) 109 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 110 newskb, NULL, newskb->dev, 111 dev_loopback_xmit); 112 113 if (ipv6_hdr(skb)->hop_limit == 0) { 114 IP6_INC_STATS(dev_net(dev), idev, 115 IPSTATS_MIB_OUTDISCARDS); 116 kfree_skb(skb); 117 return 0; 118 } 119 } 120 121 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 122 skb->len); 123 } 124 125 rcu_read_lock(); 126 neigh = dst_get_neighbour_noref(dst); 127 if (neigh) { 128 int res = dst_neigh_output(dst, neigh, skb); 129 130 rcu_read_unlock(); 131 return res; 132 } 133 rcu_read_unlock(); 134 IP6_INC_STATS_BH(dev_net(dst->dev), 135 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 136 kfree_skb(skb); 137 return -EINVAL; 138} 139 140static int ip6_finish_output(struct sk_buff *skb) 141{ 142 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 143 dst_allfrag(skb_dst(skb))) 144 return ip6_fragment(skb, ip6_finish_output2); 145 else 146 return ip6_finish_output2(skb); 147} 148 149int ip6_output(struct sk_buff *skb) 150{ 151 struct net_device *dev = skb_dst(skb)->dev; 152 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 153 if (unlikely(idev->cnf.disable_ipv6)) { 154 IP6_INC_STATS(dev_net(dev), idev, 155 IPSTATS_MIB_OUTDISCARDS); 156 kfree_skb(skb); 157 return 0; 158 } 159 160 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 161 ip6_finish_output, 162 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 163} 164 165/* 166 * xmit an sk_buff (used by TCP, SCTP and DCCP) 167 */ 168 169int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 170 struct ipv6_txoptions *opt, int tclass) 171{ 172 struct net *net = sock_net(sk); 173 struct ipv6_pinfo *np = inet6_sk(sk); 174 struct in6_addr *first_hop = &fl6->daddr; 175 struct dst_entry *dst = skb_dst(skb); 176 struct ipv6hdr *hdr; 177 u8 proto = fl6->flowi6_proto; 178 int seg_len = skb->len; 179 int hlimit = -1; 180 u32 mtu; 181 182 if (opt) { 183 unsigned int head_room; 184 185 /* First: exthdrs may take lots of space (~8K for now) 186 MAX_HEADER is not enough. 187 */ 188 head_room = opt->opt_nflen + opt->opt_flen; 189 seg_len += head_room; 190 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 191 192 if (skb_headroom(skb) < head_room) { 193 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 194 if (skb2 == NULL) { 195 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 196 IPSTATS_MIB_OUTDISCARDS); 197 kfree_skb(skb); 198 return -ENOBUFS; 199 } 200 consume_skb(skb); 201 skb = skb2; 202 skb_set_owner_w(skb, sk); 203 } 204 if (opt->opt_flen) 205 ipv6_push_frag_opts(skb, opt, &proto); 206 if (opt->opt_nflen) 207 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 208 } 209 210 skb_push(skb, sizeof(struct ipv6hdr)); 211 skb_reset_network_header(skb); 212 hdr = ipv6_hdr(skb); 213 214 /* 215 * Fill in the IPv6 header 216 */ 217 if (np) 218 hlimit = np->hop_limit; 219 if (hlimit < 0) 220 hlimit = ip6_dst_hoplimit(dst); 221 222 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; 223 224 hdr->payload_len = htons(seg_len); 225 hdr->nexthdr = proto; 226 hdr->hop_limit = hlimit; 227 228 hdr->saddr = fl6->saddr; 229 hdr->daddr = *first_hop; 230 231 skb->priority = sk->sk_priority; 232 skb->mark = sk->sk_mark; 233 234 mtu = dst_mtu(dst); 235 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 236 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 237 IPSTATS_MIB_OUT, skb->len); 238 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 239 dst->dev, dst_output); 240 } 241 242 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); 243 skb->dev = dst->dev; 244 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 245 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 246 kfree_skb(skb); 247 return -EMSGSIZE; 248} 249 250EXPORT_SYMBOL(ip6_xmit); 251 252/* 253 * To avoid extra problems ND packets are send through this 254 * routine. It's code duplication but I really want to avoid 255 * extra checks since ipv6_build_header is used by TCP (which 256 * is for us performance critical) 257 */ 258 259int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 260 const struct in6_addr *saddr, const struct in6_addr *daddr, 261 int proto, int len) 262{ 263 struct ipv6_pinfo *np = inet6_sk(sk); 264 struct ipv6hdr *hdr; 265 266 skb->protocol = htons(ETH_P_IPV6); 267 skb->dev = dev; 268 269 skb_reset_network_header(skb); 270 skb_put(skb, sizeof(struct ipv6hdr)); 271 hdr = ipv6_hdr(skb); 272 273 *(__be32*)hdr = htonl(0x60000000); 274 275 hdr->payload_len = htons(len); 276 hdr->nexthdr = proto; 277 hdr->hop_limit = np->hop_limit; 278 279 hdr->saddr = *saddr; 280 hdr->daddr = *daddr; 281 282 return 0; 283} 284 285static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 286{ 287 struct ip6_ra_chain *ra; 288 struct sock *last = NULL; 289 290 read_lock(&ip6_ra_lock); 291 for (ra = ip6_ra_chain; ra; ra = ra->next) { 292 struct sock *sk = ra->sk; 293 if (sk && ra->sel == sel && 294 (!sk->sk_bound_dev_if || 295 sk->sk_bound_dev_if == skb->dev->ifindex)) { 296 if (last) { 297 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 298 if (skb2) 299 rawv6_rcv(last, skb2); 300 } 301 last = sk; 302 } 303 } 304 305 if (last) { 306 rawv6_rcv(last, skb); 307 read_unlock(&ip6_ra_lock); 308 return 1; 309 } 310 read_unlock(&ip6_ra_lock); 311 return 0; 312} 313 314static int ip6_forward_proxy_check(struct sk_buff *skb) 315{ 316 struct ipv6hdr *hdr = ipv6_hdr(skb); 317 u8 nexthdr = hdr->nexthdr; 318 __be16 frag_off; 319 int offset; 320 321 if (ipv6_ext_hdr(nexthdr)) { 322 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 323 if (offset < 0) 324 return 0; 325 } else 326 offset = sizeof(struct ipv6hdr); 327 328 if (nexthdr == IPPROTO_ICMPV6) { 329 struct icmp6hdr *icmp6; 330 331 if (!pskb_may_pull(skb, (skb_network_header(skb) + 332 offset + 1 - skb->data))) 333 return 0; 334 335 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 336 337 switch (icmp6->icmp6_type) { 338 case NDISC_ROUTER_SOLICITATION: 339 case NDISC_ROUTER_ADVERTISEMENT: 340 case NDISC_NEIGHBOUR_SOLICITATION: 341 case NDISC_NEIGHBOUR_ADVERTISEMENT: 342 case NDISC_REDIRECT: 343 /* For reaction involving unicast neighbor discovery 344 * message destined to the proxied address, pass it to 345 * input function. 346 */ 347 return 1; 348 default: 349 break; 350 } 351 } 352 353 /* 354 * The proxying router can't forward traffic sent to a link-local 355 * address, so signal the sender and discard the packet. This 356 * behavior is clarified by the MIPv6 specification. 357 */ 358 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 359 dst_link_failure(skb); 360 return -1; 361 } 362 363 return 0; 364} 365 366static inline int ip6_forward_finish(struct sk_buff *skb) 367{ 368 return dst_output(skb); 369} 370 371int ip6_forward(struct sk_buff *skb) 372{ 373 struct dst_entry *dst = skb_dst(skb); 374 struct ipv6hdr *hdr = ipv6_hdr(skb); 375 struct inet6_skb_parm *opt = IP6CB(skb); 376 struct net *net = dev_net(dst->dev); 377 u32 mtu; 378 379 if (net->ipv6.devconf_all->forwarding == 0) 380 goto error; 381 382 if (skb_warn_if_lro(skb)) 383 goto drop; 384 385 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 386 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 387 goto drop; 388 } 389 390 if (skb->pkt_type != PACKET_HOST) 391 goto drop; 392 393 skb_forward_csum(skb); 394 395 /* 396 * We DO NOT make any processing on 397 * RA packets, pushing them to user level AS IS 398 * without ane WARRANTY that application will be able 399 * to interpret them. The reason is that we 400 * cannot make anything clever here. 401 * 402 * We are not end-node, so that if packet contains 403 * AH/ESP, we cannot make anything. 404 * Defragmentation also would be mistake, RA packets 405 * cannot be fragmented, because there is no warranty 406 * that different fragments will go along one path. --ANK 407 */ 408 if (opt->ra) { 409 u8 *ptr = skb_network_header(skb) + opt->ra; 410 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 411 return 0; 412 } 413 414 /* 415 * check and decrement ttl 416 */ 417 if (hdr->hop_limit <= 1) { 418 /* Force OUTPUT device used as source address */ 419 skb->dev = dst->dev; 420 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 421 IP6_INC_STATS_BH(net, 422 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 423 424 kfree_skb(skb); 425 return -ETIMEDOUT; 426 } 427 428 /* XXX: idev->cnf.proxy_ndp? */ 429 if (net->ipv6.devconf_all->proxy_ndp && 430 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 431 int proxied = ip6_forward_proxy_check(skb); 432 if (proxied > 0) 433 return ip6_input(skb); 434 else if (proxied < 0) { 435 IP6_INC_STATS(net, ip6_dst_idev(dst), 436 IPSTATS_MIB_INDISCARDS); 437 goto drop; 438 } 439 } 440 441 if (!xfrm6_route_forward(skb)) { 442 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 443 goto drop; 444 } 445 dst = skb_dst(skb); 446 447 /* IPv6 specs say nothing about it, but it is clear that we cannot 448 send redirects to source routed frames. 449 We don't send redirects to frames decapsulated from IPsec. 450 */ 451 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 452 struct in6_addr *target = NULL; 453 struct inet_peer *peer; 454 struct rt6_info *rt; 455 456 /* 457 * incoming and outgoing devices are the same 458 * send a redirect. 459 */ 460 461 rt = (struct rt6_info *) dst; 462 if (rt->rt6i_flags & RTF_GATEWAY) 463 target = &rt->rt6i_gateway; 464 else 465 target = &hdr->daddr; 466 467 peer = rt6_get_peer_create(rt); 468 469 /* Limit redirects both by destination (here) 470 and by source (inside ndisc_send_redirect) 471 */ 472 if (inet_peer_xrlim_allow(peer, 1*HZ)) 473 ndisc_send_redirect(skb, target); 474 } else { 475 int addrtype = ipv6_addr_type(&hdr->saddr); 476 477 /* This check is security critical. */ 478 if (addrtype == IPV6_ADDR_ANY || 479 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 480 goto error; 481 if (addrtype & IPV6_ADDR_LINKLOCAL) { 482 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 483 ICMPV6_NOT_NEIGHBOUR, 0); 484 goto error; 485 } 486 } 487 488 mtu = dst_mtu(dst); 489 if (mtu < IPV6_MIN_MTU) 490 mtu = IPV6_MIN_MTU; 491 492 if (skb->len > mtu && !skb_is_gso(skb)) { 493 /* Again, force OUTPUT device used as source address */ 494 skb->dev = dst->dev; 495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 496 IP6_INC_STATS_BH(net, 497 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 498 IP6_INC_STATS_BH(net, 499 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 500 kfree_skb(skb); 501 return -EMSGSIZE; 502 } 503 504 if (skb_cow(skb, dst->dev->hard_header_len)) { 505 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 506 goto drop; 507 } 508 509 hdr = ipv6_hdr(skb); 510 511 /* Mangling hops number delayed to point after skb COW */ 512 513 hdr->hop_limit--; 514 515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 516 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 517 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 518 ip6_forward_finish); 519 520error: 521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 522drop: 523 kfree_skb(skb); 524 return -EINVAL; 525} 526 527static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 528{ 529 to->pkt_type = from->pkt_type; 530 to->priority = from->priority; 531 to->protocol = from->protocol; 532 skb_dst_drop(to); 533 skb_dst_set(to, dst_clone(skb_dst(from))); 534 to->dev = from->dev; 535 to->mark = from->mark; 536 537#ifdef CONFIG_NET_SCHED 538 to->tc_index = from->tc_index; 539#endif 540 nf_copy(to, from); 541#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 542 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 543 to->nf_trace = from->nf_trace; 544#endif 545 skb_copy_secmark(to, from); 546} 547 548int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 549{ 550 u16 offset = sizeof(struct ipv6hdr); 551 struct ipv6_opt_hdr *exthdr = 552 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 553 unsigned int packet_len = skb->tail - skb->network_header; 554 int found_rhdr = 0; 555 *nexthdr = &ipv6_hdr(skb)->nexthdr; 556 557 while (offset + 1 <= packet_len) { 558 559 switch (**nexthdr) { 560 561 case NEXTHDR_HOP: 562 break; 563 case NEXTHDR_ROUTING: 564 found_rhdr = 1; 565 break; 566 case NEXTHDR_DEST: 567#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 568 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 569 break; 570#endif 571 if (found_rhdr) 572 return offset; 573 break; 574 default : 575 return offset; 576 } 577 578 offset += ipv6_optlen(exthdr); 579 *nexthdr = &exthdr->nexthdr; 580 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 581 offset); 582 } 583 584 return offset; 585} 586 587void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) 588{ 589 static atomic_t ipv6_fragmentation_id; 590 int old, new; 591 592 if (rt && !(rt->dst.flags & DST_NOPEER)) { 593 struct inet_peer *peer = rt6_get_peer_create(rt); 594 595 if (peer) { 596 fhdr->identification = htonl(inet_getid(peer, 0)); 597 return; 598 } 599 } 600 do { 601 old = atomic_read(&ipv6_fragmentation_id); 602 new = old + 1; 603 if (!new) 604 new = 1; 605 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); 606 fhdr->identification = htonl(new); 607} 608 609int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 610{ 611 struct sk_buff *frag; 612 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 613 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 614 struct ipv6hdr *tmp_hdr; 615 struct frag_hdr *fh; 616 unsigned int mtu, hlen, left, len; 617 int hroom, troom; 618 __be32 frag_id = 0; 619 int ptr, offset = 0, err=0; 620 u8 *prevhdr, nexthdr = 0; 621 struct net *net = dev_net(skb_dst(skb)->dev); 622 623 hlen = ip6_find_1stfragopt(skb, &prevhdr); 624 nexthdr = *prevhdr; 625 626 mtu = ip6_skb_dst_mtu(skb); 627 628 /* We must not fragment if the socket is set to force MTU discovery 629 * or if the skb it not generated by a local socket. 630 */ 631 if (unlikely(!skb->local_df && skb->len > mtu)) { 632 if (skb->sk && dst_allfrag(skb_dst(skb))) 633 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 634 635 skb->dev = skb_dst(skb)->dev; 636 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 637 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 638 IPSTATS_MIB_FRAGFAILS); 639 kfree_skb(skb); 640 return -EMSGSIZE; 641 } 642 643 if (np && np->frag_size < mtu) { 644 if (np->frag_size) 645 mtu = np->frag_size; 646 } 647 mtu -= hlen + sizeof(struct frag_hdr); 648 649 if (skb_has_frag_list(skb)) { 650 int first_len = skb_pagelen(skb); 651 struct sk_buff *frag2; 652 653 if (first_len - hlen > mtu || 654 ((first_len - hlen) & 7) || 655 skb_cloned(skb)) 656 goto slow_path; 657 658 skb_walk_frags(skb, frag) { 659 /* Correct geometry. */ 660 if (frag->len > mtu || 661 ((frag->len & 7) && frag->next) || 662 skb_headroom(frag) < hlen) 663 goto slow_path_clean; 664 665 /* Partially cloned skb? */ 666 if (skb_shared(frag)) 667 goto slow_path_clean; 668 669 BUG_ON(frag->sk); 670 if (skb->sk) { 671 frag->sk = skb->sk; 672 frag->destructor = sock_wfree; 673 } 674 skb->truesize -= frag->truesize; 675 } 676 677 err = 0; 678 offset = 0; 679 frag = skb_shinfo(skb)->frag_list; 680 skb_frag_list_init(skb); 681 /* BUILD HEADER */ 682 683 *prevhdr = NEXTHDR_FRAGMENT; 684 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 685 if (!tmp_hdr) { 686 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 687 IPSTATS_MIB_FRAGFAILS); 688 return -ENOMEM; 689 } 690 691 __skb_pull(skb, hlen); 692 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 693 __skb_push(skb, hlen); 694 skb_reset_network_header(skb); 695 memcpy(skb_network_header(skb), tmp_hdr, hlen); 696 697 ipv6_select_ident(fh, rt); 698 fh->nexthdr = nexthdr; 699 fh->reserved = 0; 700 fh->frag_off = htons(IP6_MF); 701 frag_id = fh->identification; 702 703 first_len = skb_pagelen(skb); 704 skb->data_len = first_len - skb_headlen(skb); 705 skb->len = first_len; 706 ipv6_hdr(skb)->payload_len = htons(first_len - 707 sizeof(struct ipv6hdr)); 708 709 dst_hold(&rt->dst); 710 711 for (;;) { 712 /* Prepare header of the next frame, 713 * before previous one went down. */ 714 if (frag) { 715 frag->ip_summed = CHECKSUM_NONE; 716 skb_reset_transport_header(frag); 717 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 718 __skb_push(frag, hlen); 719 skb_reset_network_header(frag); 720 memcpy(skb_network_header(frag), tmp_hdr, 721 hlen); 722 offset += skb->len - hlen - sizeof(struct frag_hdr); 723 fh->nexthdr = nexthdr; 724 fh->reserved = 0; 725 fh->frag_off = htons(offset); 726 if (frag->next != NULL) 727 fh->frag_off |= htons(IP6_MF); 728 fh->identification = frag_id; 729 ipv6_hdr(frag)->payload_len = 730 htons(frag->len - 731 sizeof(struct ipv6hdr)); 732 ip6_copy_metadata(frag, skb); 733 } 734 735 err = output(skb); 736 if(!err) 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 738 IPSTATS_MIB_FRAGCREATES); 739 740 if (err || !frag) 741 break; 742 743 skb = frag; 744 frag = skb->next; 745 skb->next = NULL; 746 } 747 748 kfree(tmp_hdr); 749 750 if (err == 0) { 751 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 752 IPSTATS_MIB_FRAGOKS); 753 dst_release(&rt->dst); 754 return 0; 755 } 756 757 while (frag) { 758 skb = frag->next; 759 kfree_skb(frag); 760 frag = skb; 761 } 762 763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 764 IPSTATS_MIB_FRAGFAILS); 765 dst_release(&rt->dst); 766 return err; 767 768slow_path_clean: 769 skb_walk_frags(skb, frag2) { 770 if (frag2 == frag) 771 break; 772 frag2->sk = NULL; 773 frag2->destructor = NULL; 774 skb->truesize += frag2->truesize; 775 } 776 } 777 778slow_path: 779 if ((skb->ip_summed == CHECKSUM_PARTIAL) && 780 skb_checksum_help(skb)) 781 goto fail; 782 783 left = skb->len - hlen; /* Space per frame */ 784 ptr = hlen; /* Where to start from */ 785 786 /* 787 * Fragment the datagram. 788 */ 789 790 *prevhdr = NEXTHDR_FRAGMENT; 791 hroom = LL_RESERVED_SPACE(rt->dst.dev); 792 troom = rt->dst.dev->needed_tailroom; 793 794 /* 795 * Keep copying data until we run out. 796 */ 797 while(left > 0) { 798 len = left; 799 /* IF: it doesn't fit, use 'mtu' - the data space left */ 800 if (len > mtu) 801 len = mtu; 802 /* IF: we are not sending up to and including the packet end 803 then align the next start on an eight byte boundary */ 804 if (len < left) { 805 len &= ~7; 806 } 807 /* 808 * Allocate buffer. 809 */ 810 811 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 812 hroom + troom, GFP_ATOMIC)) == NULL) { 813 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 814 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 815 IPSTATS_MIB_FRAGFAILS); 816 err = -ENOMEM; 817 goto fail; 818 } 819 820 /* 821 * Set up data on packet 822 */ 823 824 ip6_copy_metadata(frag, skb); 825 skb_reserve(frag, hroom); 826 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 827 skb_reset_network_header(frag); 828 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 829 frag->transport_header = (frag->network_header + hlen + 830 sizeof(struct frag_hdr)); 831 832 /* 833 * Charge the memory for the fragment to any owner 834 * it might possess 835 */ 836 if (skb->sk) 837 skb_set_owner_w(frag, skb->sk); 838 839 /* 840 * Copy the packet header into the new buffer. 841 */ 842 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 843 844 /* 845 * Build fragment header. 846 */ 847 fh->nexthdr = nexthdr; 848 fh->reserved = 0; 849 if (!frag_id) { 850 ipv6_select_ident(fh, rt); 851 frag_id = fh->identification; 852 } else 853 fh->identification = frag_id; 854 855 /* 856 * Copy a block of the IP datagram. 857 */ 858 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 859 BUG(); 860 left -= len; 861 862 fh->frag_off = htons(offset); 863 if (left > 0) 864 fh->frag_off |= htons(IP6_MF); 865 ipv6_hdr(frag)->payload_len = htons(frag->len - 866 sizeof(struct ipv6hdr)); 867 868 ptr += len; 869 offset += len; 870 871 /* 872 * Put this fragment into the sending queue. 873 */ 874 err = output(frag); 875 if (err) 876 goto fail; 877 878 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 879 IPSTATS_MIB_FRAGCREATES); 880 } 881 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 882 IPSTATS_MIB_FRAGOKS); 883 consume_skb(skb); 884 return err; 885 886fail: 887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 888 IPSTATS_MIB_FRAGFAILS); 889 kfree_skb(skb); 890 return err; 891} 892 893static inline int ip6_rt_check(const struct rt6key *rt_key, 894 const struct in6_addr *fl_addr, 895 const struct in6_addr *addr_cache) 896{ 897 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 898 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 899} 900 901static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 902 struct dst_entry *dst, 903 const struct flowi6 *fl6) 904{ 905 struct ipv6_pinfo *np = inet6_sk(sk); 906 struct rt6_info *rt = (struct rt6_info *)dst; 907 908 if (!dst) 909 goto out; 910 911 /* Yes, checking route validity in not connected 912 * case is not very simple. Take into account, 913 * that we do not support routing by source, TOS, 914 * and MSG_DONTROUTE --ANK (980726) 915 * 916 * 1. ip6_rt_check(): If route was host route, 917 * check that cached destination is current. 918 * If it is network route, we still may 919 * check its validity using saved pointer 920 * to the last used address: daddr_cache. 921 * We do not want to save whole address now, 922 * (because main consumer of this service 923 * is tcp, which has not this problem), 924 * so that the last trick works only on connected 925 * sockets. 926 * 2. oif also should be the same. 927 */ 928 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 929#ifdef CONFIG_IPV6_SUBTREES 930 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 931#endif 932 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 933 dst_release(dst); 934 dst = NULL; 935 } 936 937out: 938 return dst; 939} 940 941static int ip6_dst_lookup_tail(struct sock *sk, 942 struct dst_entry **dst, struct flowi6 *fl6) 943{ 944 struct net *net = sock_net(sk); 945#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 946 struct neighbour *n; 947#endif 948 int err; 949 950 if (*dst == NULL) 951 *dst = ip6_route_output(net, sk, fl6); 952 953 if ((err = (*dst)->error)) 954 goto out_err_release; 955 956 if (ipv6_addr_any(&fl6->saddr)) { 957 struct rt6_info *rt = (struct rt6_info *) *dst; 958 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 959 sk ? inet6_sk(sk)->srcprefs : 0, 960 &fl6->saddr); 961 if (err) 962 goto out_err_release; 963 } 964 965#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 966 /* 967 * Here if the dst entry we've looked up 968 * has a neighbour entry that is in the INCOMPLETE 969 * state and the src address from the flow is 970 * marked as OPTIMISTIC, we release the found 971 * dst entry and replace it instead with the 972 * dst entry of the nexthop router 973 */ 974 rcu_read_lock(); 975 n = dst_get_neighbour_noref(*dst); 976 if (n && !(n->nud_state & NUD_VALID)) { 977 struct inet6_ifaddr *ifp; 978 struct flowi6 fl_gw6; 979 int redirect; 980 981 rcu_read_unlock(); 982 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 983 (*dst)->dev, 1); 984 985 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 986 if (ifp) 987 in6_ifa_put(ifp); 988 989 if (redirect) { 990 /* 991 * We need to get the dst entry for the 992 * default router instead 993 */ 994 dst_release(*dst); 995 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 996 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 997 *dst = ip6_route_output(net, sk, &fl_gw6); 998 if ((err = (*dst)->error)) 999 goto out_err_release; 1000 } 1001 } else { 1002 rcu_read_unlock(); 1003 } 1004#endif 1005 1006 return 0; 1007 1008out_err_release: 1009 if (err == -ENETUNREACH) 1010 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1011 dst_release(*dst); 1012 *dst = NULL; 1013 return err; 1014} 1015 1016/** 1017 * ip6_dst_lookup - perform route lookup on flow 1018 * @sk: socket which provides route info 1019 * @dst: pointer to dst_entry * for result 1020 * @fl6: flow to lookup 1021 * 1022 * This function performs a route lookup on the given flow. 1023 * 1024 * It returns zero on success, or a standard errno code on error. 1025 */ 1026int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 1027{ 1028 *dst = NULL; 1029 return ip6_dst_lookup_tail(sk, dst, fl6); 1030} 1031EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1032 1033/** 1034 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1035 * @sk: socket which provides route info 1036 * @fl6: flow to lookup 1037 * @final_dst: final destination address for ipsec lookup 1038 * @can_sleep: we are in a sleepable context 1039 * 1040 * This function performs a route lookup on the given flow. 1041 * 1042 * It returns a valid dst pointer on success, or a pointer encoded 1043 * error code. 1044 */ 1045struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1046 const struct in6_addr *final_dst, 1047 bool can_sleep) 1048{ 1049 struct dst_entry *dst = NULL; 1050 int err; 1051 1052 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1053 if (err) 1054 return ERR_PTR(err); 1055 if (final_dst) 1056 fl6->daddr = *final_dst; 1057 if (can_sleep) 1058 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1059 1060 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1061} 1062EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1063 1064/** 1065 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1066 * @sk: socket which provides the dst cache and route info 1067 * @fl6: flow to lookup 1068 * @final_dst: final destination address for ipsec lookup 1069 * @can_sleep: we are in a sleepable context 1070 * 1071 * This function performs a route lookup on the given flow with the 1072 * possibility of using the cached route in the socket if it is valid. 1073 * It will take the socket dst lock when operating on the dst cache. 1074 * As a result, this function can only be used in process context. 1075 * 1076 * It returns a valid dst pointer on success, or a pointer encoded 1077 * error code. 1078 */ 1079struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1080 const struct in6_addr *final_dst, 1081 bool can_sleep) 1082{ 1083 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1084 int err; 1085 1086 dst = ip6_sk_dst_check(sk, dst, fl6); 1087 1088 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1089 if (err) 1090 return ERR_PTR(err); 1091 if (final_dst) 1092 fl6->daddr = *final_dst; 1093 if (can_sleep) 1094 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1095 1096 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1097} 1098EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1099 1100static inline int ip6_ufo_append_data(struct sock *sk, 1101 int getfrag(void *from, char *to, int offset, int len, 1102 int odd, struct sk_buff *skb), 1103 void *from, int length, int hh_len, int fragheaderlen, 1104 int transhdrlen, int mtu,unsigned int flags, 1105 struct rt6_info *rt) 1106 1107{ 1108 struct sk_buff *skb; 1109 int err; 1110 1111 /* There is support for UDP large send offload by network 1112 * device, so create one single skb packet containing complete 1113 * udp datagram 1114 */ 1115 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1116 skb = sock_alloc_send_skb(sk, 1117 hh_len + fragheaderlen + transhdrlen + 20, 1118 (flags & MSG_DONTWAIT), &err); 1119 if (skb == NULL) 1120 return err; 1121 1122 /* reserve space for Hardware header */ 1123 skb_reserve(skb, hh_len); 1124 1125 /* create space for UDP/IP header */ 1126 skb_put(skb,fragheaderlen + transhdrlen); 1127 1128 /* initialize network header pointer */ 1129 skb_reset_network_header(skb); 1130 1131 /* initialize protocol header pointer */ 1132 skb->transport_header = skb->network_header + fragheaderlen; 1133 1134 skb->ip_summed = CHECKSUM_PARTIAL; 1135 skb->csum = 0; 1136 } 1137 1138 err = skb_append_datato_frags(sk,skb, getfrag, from, 1139 (length - transhdrlen)); 1140 if (!err) { 1141 struct frag_hdr fhdr; 1142 1143 /* Specify the length of each IPv6 datagram fragment. 1144 * It has to be a multiple of 8. 1145 */ 1146 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1147 sizeof(struct frag_hdr)) & ~7; 1148 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1149 ipv6_select_ident(&fhdr, rt); 1150 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1151 __skb_queue_tail(&sk->sk_write_queue, skb); 1152 1153 return 0; 1154 } 1155 /* There is not enough support do UPD LSO, 1156 * so follow normal path 1157 */ 1158 kfree_skb(skb); 1159 1160 return err; 1161} 1162 1163static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1164 gfp_t gfp) 1165{ 1166 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1167} 1168 1169static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1170 gfp_t gfp) 1171{ 1172 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1173} 1174 1175static void ip6_append_data_mtu(int *mtu, 1176 int *maxfraglen, 1177 unsigned int fragheaderlen, 1178 struct sk_buff *skb, 1179 struct rt6_info *rt) 1180{ 1181 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1182 if (skb == NULL) { 1183 /* first fragment, reserve header_len */ 1184 *mtu = *mtu - rt->dst.header_len; 1185 1186 } else { 1187 /* 1188 * this fragment is not first, the headers 1189 * space is regarded as data space. 1190 */ 1191 *mtu = dst_mtu(rt->dst.path); 1192 } 1193 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1194 + fragheaderlen - sizeof(struct frag_hdr); 1195 } 1196} 1197 1198int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1199 int offset, int len, int odd, struct sk_buff *skb), 1200 void *from, int length, int transhdrlen, 1201 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1202 struct rt6_info *rt, unsigned int flags, int dontfrag) 1203{ 1204 struct inet_sock *inet = inet_sk(sk); 1205 struct ipv6_pinfo *np = inet6_sk(sk); 1206 struct inet_cork *cork; 1207 struct sk_buff *skb, *skb_prev = NULL; 1208 unsigned int maxfraglen, fragheaderlen; 1209 int exthdrlen; 1210 int dst_exthdrlen; 1211 int hh_len; 1212 int mtu; 1213 int copy; 1214 int err; 1215 int offset = 0; 1216 __u8 tx_flags = 0; 1217 1218 if (flags&MSG_PROBE) 1219 return 0; 1220 cork = &inet->cork.base; 1221 if (skb_queue_empty(&sk->sk_write_queue)) { 1222 /* 1223 * setup for corking 1224 */ 1225 if (opt) { 1226 if (WARN_ON(np->cork.opt)) 1227 return -EINVAL; 1228 1229 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1230 if (unlikely(np->cork.opt == NULL)) 1231 return -ENOBUFS; 1232 1233 np->cork.opt->tot_len = opt->tot_len; 1234 np->cork.opt->opt_flen = opt->opt_flen; 1235 np->cork.opt->opt_nflen = opt->opt_nflen; 1236 1237 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1238 sk->sk_allocation); 1239 if (opt->dst0opt && !np->cork.opt->dst0opt) 1240 return -ENOBUFS; 1241 1242 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1243 sk->sk_allocation); 1244 if (opt->dst1opt && !np->cork.opt->dst1opt) 1245 return -ENOBUFS; 1246 1247 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1248 sk->sk_allocation); 1249 if (opt->hopopt && !np->cork.opt->hopopt) 1250 return -ENOBUFS; 1251 1252 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1253 sk->sk_allocation); 1254 if (opt->srcrt && !np->cork.opt->srcrt) 1255 return -ENOBUFS; 1256 1257 /* need source address above miyazawa*/ 1258 } 1259 dst_hold(&rt->dst); 1260 cork->dst = &rt->dst; 1261 inet->cork.fl.u.ip6 = *fl6; 1262 np->cork.hop_limit = hlimit; 1263 np->cork.tclass = tclass; 1264 if (rt->dst.flags & DST_XFRM_TUNNEL) 1265 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1266 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1267 else 1268 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1269 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1270 if (np->frag_size < mtu) { 1271 if (np->frag_size) 1272 mtu = np->frag_size; 1273 } 1274 cork->fragsize = mtu; 1275 if (dst_allfrag(rt->dst.path)) 1276 cork->flags |= IPCORK_ALLFRAG; 1277 cork->length = 0; 1278 sk->sk_sndmsg_page = NULL; 1279 sk->sk_sndmsg_off = 0; 1280 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 1281 length += exthdrlen; 1282 transhdrlen += exthdrlen; 1283 dst_exthdrlen = rt->dst.header_len; 1284 } else { 1285 rt = (struct rt6_info *)cork->dst; 1286 fl6 = &inet->cork.fl.u.ip6; 1287 opt = np->cork.opt; 1288 transhdrlen = 0; 1289 exthdrlen = 0; 1290 dst_exthdrlen = 0; 1291 mtu = cork->fragsize; 1292 } 1293 1294 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1295 1296 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1297 (opt ? opt->opt_nflen : 0); 1298 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1299 1300 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1301 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1302 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); 1303 return -EMSGSIZE; 1304 } 1305 } 1306 1307 /* For UDP, check if TX timestamp is enabled */ 1308 if (sk->sk_type == SOCK_DGRAM) { 1309 err = sock_tx_timestamp(sk, &tx_flags); 1310 if (err) 1311 goto error; 1312 } 1313 1314 /* 1315 * Let's try using as much space as possible. 1316 * Use MTU if total length of the message fits into the MTU. 1317 * Otherwise, we need to reserve fragment header and 1318 * fragment alignment (= 8-15 octects, in total). 1319 * 1320 * Note that we may need to "move" the data from the tail of 1321 * of the buffer to the new fragment when we split 1322 * the message. 1323 * 1324 * FIXME: It may be fragmented into multiple chunks 1325 * at once if non-fragmentable extension headers 1326 * are too large. 1327 * --yoshfuji 1328 */ 1329 1330 cork->length += length; 1331 if (length > mtu) { 1332 int proto = sk->sk_protocol; 1333 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1334 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1335 return -EMSGSIZE; 1336 } 1337 1338 if (proto == IPPROTO_UDP && 1339 (rt->dst.dev->features & NETIF_F_UFO)) { 1340 1341 err = ip6_ufo_append_data(sk, getfrag, from, length, 1342 hh_len, fragheaderlen, 1343 transhdrlen, mtu, flags, rt); 1344 if (err) 1345 goto error; 1346 return 0; 1347 } 1348 } 1349 1350 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1351 goto alloc_new_skb; 1352 1353 while (length > 0) { 1354 /* Check if the remaining data fits into current packet. */ 1355 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1356 if (copy < length) 1357 copy = maxfraglen - skb->len; 1358 1359 if (copy <= 0) { 1360 char *data; 1361 unsigned int datalen; 1362 unsigned int fraglen; 1363 unsigned int fraggap; 1364 unsigned int alloclen; 1365alloc_new_skb: 1366 /* There's no room in the current skb */ 1367 if (skb) 1368 fraggap = skb->len - maxfraglen; 1369 else 1370 fraggap = 0; 1371 /* update mtu and maxfraglen if necessary */ 1372 if (skb == NULL || skb_prev == NULL) 1373 ip6_append_data_mtu(&mtu, &maxfraglen, 1374 fragheaderlen, skb, rt); 1375 1376 skb_prev = skb; 1377 1378 /* 1379 * If remaining data exceeds the mtu, 1380 * we know we need more fragment(s). 1381 */ 1382 datalen = length + fraggap; 1383 1384 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1385 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1386 if ((flags & MSG_MORE) && 1387 !(rt->dst.dev->features&NETIF_F_SG)) 1388 alloclen = mtu; 1389 else 1390 alloclen = datalen + fragheaderlen; 1391 1392 alloclen += dst_exthdrlen; 1393 1394 if (datalen != length + fraggap) { 1395 /* 1396 * this is not the last fragment, the trailer 1397 * space is regarded as data space. 1398 */ 1399 datalen += rt->dst.trailer_len; 1400 } 1401 1402 alloclen += rt->dst.trailer_len; 1403 fraglen = datalen + fragheaderlen; 1404 1405 /* 1406 * We just reserve space for fragment header. 1407 * Note: this may be overallocation if the message 1408 * (without MSG_MORE) fits into the MTU. 1409 */ 1410 alloclen += sizeof(struct frag_hdr); 1411 1412 if (transhdrlen) { 1413 skb = sock_alloc_send_skb(sk, 1414 alloclen + hh_len, 1415 (flags & MSG_DONTWAIT), &err); 1416 } else { 1417 skb = NULL; 1418 if (atomic_read(&sk->sk_wmem_alloc) <= 1419 2 * sk->sk_sndbuf) 1420 skb = sock_wmalloc(sk, 1421 alloclen + hh_len, 1, 1422 sk->sk_allocation); 1423 if (unlikely(skb == NULL)) 1424 err = -ENOBUFS; 1425 else { 1426 /* Only the initial fragment 1427 * is time stamped. 1428 */ 1429 tx_flags = 0; 1430 } 1431 } 1432 if (skb == NULL) 1433 goto error; 1434 /* 1435 * Fill in the control structures 1436 */ 1437 skb->ip_summed = CHECKSUM_NONE; 1438 skb->csum = 0; 1439 /* reserve for fragmentation and ipsec header */ 1440 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1441 dst_exthdrlen); 1442 1443 if (sk->sk_type == SOCK_DGRAM) 1444 skb_shinfo(skb)->tx_flags = tx_flags; 1445 1446 /* 1447 * Find where to start putting bytes 1448 */ 1449 data = skb_put(skb, fraglen); 1450 skb_set_network_header(skb, exthdrlen); 1451 data += fragheaderlen; 1452 skb->transport_header = (skb->network_header + 1453 fragheaderlen); 1454 if (fraggap) { 1455 skb->csum = skb_copy_and_csum_bits( 1456 skb_prev, maxfraglen, 1457 data + transhdrlen, fraggap, 0); 1458 skb_prev->csum = csum_sub(skb_prev->csum, 1459 skb->csum); 1460 data += fraggap; 1461 pskb_trim_unique(skb_prev, maxfraglen); 1462 } 1463 copy = datalen - transhdrlen - fraggap; 1464 1465 if (copy < 0) { 1466 err = -EINVAL; 1467 kfree_skb(skb); 1468 goto error; 1469 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1470 err = -EFAULT; 1471 kfree_skb(skb); 1472 goto error; 1473 } 1474 1475 offset += copy; 1476 length -= datalen - fraggap; 1477 transhdrlen = 0; 1478 exthdrlen = 0; 1479 dst_exthdrlen = 0; 1480 1481 /* 1482 * Put the packet on the pending queue 1483 */ 1484 __skb_queue_tail(&sk->sk_write_queue, skb); 1485 continue; 1486 } 1487 1488 if (copy > length) 1489 copy = length; 1490 1491 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1492 unsigned int off; 1493 1494 off = skb->len; 1495 if (getfrag(from, skb_put(skb, copy), 1496 offset, copy, off, skb) < 0) { 1497 __skb_trim(skb, off); 1498 err = -EFAULT; 1499 goto error; 1500 } 1501 } else { 1502 int i = skb_shinfo(skb)->nr_frags; 1503 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1504 struct page *page = sk->sk_sndmsg_page; 1505 int off = sk->sk_sndmsg_off; 1506 unsigned int left; 1507 1508 if (page && (left = PAGE_SIZE - off) > 0) { 1509 if (copy >= left) 1510 copy = left; 1511 if (page != skb_frag_page(frag)) { 1512 if (i == MAX_SKB_FRAGS) { 1513 err = -EMSGSIZE; 1514 goto error; 1515 } 1516 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1517 skb_frag_ref(skb, i); 1518 frag = &skb_shinfo(skb)->frags[i]; 1519 } 1520 } else if(i < MAX_SKB_FRAGS) { 1521 if (copy > PAGE_SIZE) 1522 copy = PAGE_SIZE; 1523 page = alloc_pages(sk->sk_allocation, 0); 1524 if (page == NULL) { 1525 err = -ENOMEM; 1526 goto error; 1527 } 1528 sk->sk_sndmsg_page = page; 1529 sk->sk_sndmsg_off = 0; 1530 1531 skb_fill_page_desc(skb, i, page, 0, 0); 1532 frag = &skb_shinfo(skb)->frags[i]; 1533 } else { 1534 err = -EMSGSIZE; 1535 goto error; 1536 } 1537 if (getfrag(from, 1538 skb_frag_address(frag) + skb_frag_size(frag), 1539 offset, copy, skb->len, skb) < 0) { 1540 err = -EFAULT; 1541 goto error; 1542 } 1543 sk->sk_sndmsg_off += copy; 1544 skb_frag_size_add(frag, copy); 1545 skb->len += copy; 1546 skb->data_len += copy; 1547 skb->truesize += copy; 1548 atomic_add(copy, &sk->sk_wmem_alloc); 1549 } 1550 offset += copy; 1551 length -= copy; 1552 } 1553 return 0; 1554error: 1555 cork->length -= length; 1556 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1557 return err; 1558} 1559EXPORT_SYMBOL_GPL(ip6_append_data); 1560 1561static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1562{ 1563 if (np->cork.opt) { 1564 kfree(np->cork.opt->dst0opt); 1565 kfree(np->cork.opt->dst1opt); 1566 kfree(np->cork.opt->hopopt); 1567 kfree(np->cork.opt->srcrt); 1568 kfree(np->cork.opt); 1569 np->cork.opt = NULL; 1570 } 1571 1572 if (inet->cork.base.dst) { 1573 dst_release(inet->cork.base.dst); 1574 inet->cork.base.dst = NULL; 1575 inet->cork.base.flags &= ~IPCORK_ALLFRAG; 1576 } 1577 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1578} 1579 1580int ip6_push_pending_frames(struct sock *sk) 1581{ 1582 struct sk_buff *skb, *tmp_skb; 1583 struct sk_buff **tail_skb; 1584 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1585 struct inet_sock *inet = inet_sk(sk); 1586 struct ipv6_pinfo *np = inet6_sk(sk); 1587 struct net *net = sock_net(sk); 1588 struct ipv6hdr *hdr; 1589 struct ipv6_txoptions *opt = np->cork.opt; 1590 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; 1591 struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 1592 unsigned char proto = fl6->flowi6_proto; 1593 int err = 0; 1594 1595 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1596 goto out; 1597 tail_skb = &(skb_shinfo(skb)->frag_list); 1598 1599 /* move skb->data to ip header from ext header */ 1600 if (skb->data < skb_network_header(skb)) 1601 __skb_pull(skb, skb_network_offset(skb)); 1602 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1603 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1604 *tail_skb = tmp_skb; 1605 tail_skb = &(tmp_skb->next); 1606 skb->len += tmp_skb->len; 1607 skb->data_len += tmp_skb->len; 1608 skb->truesize += tmp_skb->truesize; 1609 tmp_skb->destructor = NULL; 1610 tmp_skb->sk = NULL; 1611 } 1612 1613 /* Allow local fragmentation. */ 1614 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1615 skb->local_df = 1; 1616 1617 *final_dst = fl6->daddr; 1618 __skb_pull(skb, skb_network_header_len(skb)); 1619 if (opt && opt->opt_flen) 1620 ipv6_push_frag_opts(skb, opt, &proto); 1621 if (opt && opt->opt_nflen) 1622 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1623 1624 skb_push(skb, sizeof(struct ipv6hdr)); 1625 skb_reset_network_header(skb); 1626 hdr = ipv6_hdr(skb); 1627 1628 *(__be32*)hdr = fl6->flowlabel | 1629 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1630 1631 hdr->hop_limit = np->cork.hop_limit; 1632 hdr->nexthdr = proto; 1633 hdr->saddr = fl6->saddr; 1634 hdr->daddr = *final_dst; 1635 1636 skb->priority = sk->sk_priority; 1637 skb->mark = sk->sk_mark; 1638 1639 skb_dst_set(skb, dst_clone(&rt->dst)); 1640 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1641 if (proto == IPPROTO_ICMPV6) { 1642 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1643 1644 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1645 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1646 } 1647 1648 err = ip6_local_out(skb); 1649 if (err) { 1650 if (err > 0) 1651 err = net_xmit_errno(err); 1652 if (err) 1653 goto error; 1654 } 1655 1656out: 1657 ip6_cork_release(inet, np); 1658 return err; 1659error: 1660 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1661 goto out; 1662} 1663EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1664 1665void ip6_flush_pending_frames(struct sock *sk) 1666{ 1667 struct sk_buff *skb; 1668 1669 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1670 if (skb_dst(skb)) 1671 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1672 IPSTATS_MIB_OUTDISCARDS); 1673 kfree_skb(skb); 1674 } 1675 1676 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1677} 1678EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1679