ip6_output.c revision 69ead7afdf6028184f713a77376ee26f8aaafdcd
1/* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29#include <linux/errno.h> 30#include <linux/kernel.h> 31#include <linux/string.h> 32#include <linux/socket.h> 33#include <linux/net.h> 34#include <linux/netdevice.h> 35#include <linux/if_arp.h> 36#include <linux/in6.h> 37#include <linux/tcp.h> 38#include <linux/route.h> 39#include <linux/module.h> 40#include <linux/slab.h> 41 42#include <linux/netfilter.h> 43#include <linux/netfilter_ipv6.h> 44 45#include <net/sock.h> 46#include <net/snmp.h> 47 48#include <net/ipv6.h> 49#include <net/ndisc.h> 50#include <net/protocol.h> 51#include <net/ip6_route.h> 52#include <net/addrconf.h> 53#include <net/rawv6.h> 54#include <net/icmp.h> 55#include <net/xfrm.h> 56#include <net/checksum.h> 57#include <linux/mroute6.h> 58 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61int __ip6_local_out(struct sk_buff *skb) 62{ 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72} 73 74int ip6_local_out(struct sk_buff *skb) 75{ 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83} 84EXPORT_SYMBOL_GPL(ip6_local_out); 85 86/* dev_loopback_xmit for use with netfilter. */ 87static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 88{ 89 skb_reset_mac_header(newskb); 90 __skb_pull(newskb, skb_network_offset(newskb)); 91 newskb->pkt_type = PACKET_LOOPBACK; 92 newskb->ip_summed = CHECKSUM_UNNECESSARY; 93 WARN_ON(!skb_dst(newskb)); 94 95 netif_rx_ni(newskb); 96 return 0; 97} 98 99static int ip6_finish_output2(struct sk_buff *skb) 100{ 101 struct dst_entry *dst = skb_dst(skb); 102 struct net_device *dev = dst->dev; 103 104 skb->protocol = htons(ETH_P_IPV6); 105 skb->dev = dev; 106 107 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 109 110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 111 ((mroute6_socket(dev_net(dev), skb) && 112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 114 &ipv6_hdr(skb)->saddr))) { 115 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 116 117 /* Do not check for IFF_ALLMULTI; multicast routing 118 is not supported in any case. 119 */ 120 if (newskb) 121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 122 newskb, NULL, newskb->dev, 123 ip6_dev_loopback_xmit); 124 125 if (ipv6_hdr(skb)->hop_limit == 0) { 126 IP6_INC_STATS(dev_net(dev), idev, 127 IPSTATS_MIB_OUTDISCARDS); 128 kfree_skb(skb); 129 return 0; 130 } 131 } 132 133 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 134 skb->len); 135 } 136 137 if (dst->hh) 138 return neigh_hh_output(dst->hh, skb); 139 else if (dst->neighbour) 140 return dst->neighbour->output(skb); 141 142 IP6_INC_STATS_BH(dev_net(dst->dev), 143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 144 kfree_skb(skb); 145 return -EINVAL; 146} 147 148static int ip6_finish_output(struct sk_buff *skb) 149{ 150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 151 dst_allfrag(skb_dst(skb))) 152 return ip6_fragment(skb, ip6_finish_output2); 153 else 154 return ip6_finish_output2(skb); 155} 156 157int ip6_output(struct sk_buff *skb) 158{ 159 struct net_device *dev = skb_dst(skb)->dev; 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 161 if (unlikely(idev->cnf.disable_ipv6)) { 162 IP6_INC_STATS(dev_net(dev), idev, 163 IPSTATS_MIB_OUTDISCARDS); 164 kfree_skb(skb); 165 return 0; 166 } 167 168 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 169 ip6_finish_output, 170 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 171} 172 173/* 174 * xmit an sk_buff (used by TCP, SCTP and DCCP) 175 */ 176 177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 178 struct ipv6_txoptions *opt) 179{ 180 struct net *net = sock_net(sk); 181 struct ipv6_pinfo *np = inet6_sk(sk); 182 struct in6_addr *first_hop = &fl->fl6_dst; 183 struct dst_entry *dst = skb_dst(skb); 184 struct ipv6hdr *hdr; 185 u8 proto = fl->proto; 186 int seg_len = skb->len; 187 int hlimit = -1; 188 int tclass = 0; 189 u32 mtu; 190 191 if (opt) { 192 unsigned int head_room; 193 194 /* First: exthdrs may take lots of space (~8K for now) 195 MAX_HEADER is not enough. 196 */ 197 head_room = opt->opt_nflen + opt->opt_flen; 198 seg_len += head_room; 199 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 200 201 if (skb_headroom(skb) < head_room) { 202 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 203 if (skb2 == NULL) { 204 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 205 IPSTATS_MIB_OUTDISCARDS); 206 kfree_skb(skb); 207 return -ENOBUFS; 208 } 209 kfree_skb(skb); 210 skb = skb2; 211 skb_set_owner_w(skb, sk); 212 } 213 if (opt->opt_flen) 214 ipv6_push_frag_opts(skb, opt, &proto); 215 if (opt->opt_nflen) 216 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 217 } 218 219 skb_push(skb, sizeof(struct ipv6hdr)); 220 skb_reset_network_header(skb); 221 hdr = ipv6_hdr(skb); 222 223 /* 224 * Fill in the IPv6 header 225 */ 226 if (np) { 227 tclass = np->tclass; 228 hlimit = np->hop_limit; 229 } 230 if (hlimit < 0) 231 hlimit = ip6_dst_hoplimit(dst); 232 233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 234 235 hdr->payload_len = htons(seg_len); 236 hdr->nexthdr = proto; 237 hdr->hop_limit = hlimit; 238 239 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 240 ipv6_addr_copy(&hdr->daddr, first_hop); 241 242 skb->priority = sk->sk_priority; 243 skb->mark = sk->sk_mark; 244 245 mtu = dst_mtu(dst); 246 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 248 IPSTATS_MIB_OUT, skb->len); 249 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 250 dst->dev, dst_output); 251 } 252 253 if (net_ratelimit()) 254 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 255 skb->dev = dst->dev; 256 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 258 kfree_skb(skb); 259 return -EMSGSIZE; 260} 261 262EXPORT_SYMBOL(ip6_xmit); 263 264/* 265 * To avoid extra problems ND packets are send through this 266 * routine. It's code duplication but I really want to avoid 267 * extra checks since ipv6_build_header is used by TCP (which 268 * is for us performance critical) 269 */ 270 271int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 272 const struct in6_addr *saddr, const struct in6_addr *daddr, 273 int proto, int len) 274{ 275 struct ipv6_pinfo *np = inet6_sk(sk); 276 struct ipv6hdr *hdr; 277 278 skb->protocol = htons(ETH_P_IPV6); 279 skb->dev = dev; 280 281 skb_reset_network_header(skb); 282 skb_put(skb, sizeof(struct ipv6hdr)); 283 hdr = ipv6_hdr(skb); 284 285 *(__be32*)hdr = htonl(0x60000000); 286 287 hdr->payload_len = htons(len); 288 hdr->nexthdr = proto; 289 hdr->hop_limit = np->hop_limit; 290 291 ipv6_addr_copy(&hdr->saddr, saddr); 292 ipv6_addr_copy(&hdr->daddr, daddr); 293 294 return 0; 295} 296 297static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 298{ 299 struct ip6_ra_chain *ra; 300 struct sock *last = NULL; 301 302 read_lock(&ip6_ra_lock); 303 for (ra = ip6_ra_chain; ra; ra = ra->next) { 304 struct sock *sk = ra->sk; 305 if (sk && ra->sel == sel && 306 (!sk->sk_bound_dev_if || 307 sk->sk_bound_dev_if == skb->dev->ifindex)) { 308 if (last) { 309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 310 if (skb2) 311 rawv6_rcv(last, skb2); 312 } 313 last = sk; 314 } 315 } 316 317 if (last) { 318 rawv6_rcv(last, skb); 319 read_unlock(&ip6_ra_lock); 320 return 1; 321 } 322 read_unlock(&ip6_ra_lock); 323 return 0; 324} 325 326static int ip6_forward_proxy_check(struct sk_buff *skb) 327{ 328 struct ipv6hdr *hdr = ipv6_hdr(skb); 329 u8 nexthdr = hdr->nexthdr; 330 int offset; 331 332 if (ipv6_ext_hdr(nexthdr)) { 333 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 334 if (offset < 0) 335 return 0; 336 } else 337 offset = sizeof(struct ipv6hdr); 338 339 if (nexthdr == IPPROTO_ICMPV6) { 340 struct icmp6hdr *icmp6; 341 342 if (!pskb_may_pull(skb, (skb_network_header(skb) + 343 offset + 1 - skb->data))) 344 return 0; 345 346 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 347 348 switch (icmp6->icmp6_type) { 349 case NDISC_ROUTER_SOLICITATION: 350 case NDISC_ROUTER_ADVERTISEMENT: 351 case NDISC_NEIGHBOUR_SOLICITATION: 352 case NDISC_NEIGHBOUR_ADVERTISEMENT: 353 case NDISC_REDIRECT: 354 /* For reaction involving unicast neighbor discovery 355 * message destined to the proxied address, pass it to 356 * input function. 357 */ 358 return 1; 359 default: 360 break; 361 } 362 } 363 364 /* 365 * The proxying router can't forward traffic sent to a link-local 366 * address, so signal the sender and discard the packet. This 367 * behavior is clarified by the MIPv6 specification. 368 */ 369 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 370 dst_link_failure(skb); 371 return -1; 372 } 373 374 return 0; 375} 376 377static inline int ip6_forward_finish(struct sk_buff *skb) 378{ 379 return dst_output(skb); 380} 381 382int ip6_forward(struct sk_buff *skb) 383{ 384 struct dst_entry *dst = skb_dst(skb); 385 struct ipv6hdr *hdr = ipv6_hdr(skb); 386 struct inet6_skb_parm *opt = IP6CB(skb); 387 struct net *net = dev_net(dst->dev); 388 u32 mtu; 389 390 if (net->ipv6.devconf_all->forwarding == 0) 391 goto error; 392 393 if (skb_warn_if_lro(skb)) 394 goto drop; 395 396 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 397 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 398 goto drop; 399 } 400 401 if (skb->pkt_type != PACKET_HOST) 402 goto drop; 403 404 skb_forward_csum(skb); 405 406 /* 407 * We DO NOT make any processing on 408 * RA packets, pushing them to user level AS IS 409 * without ane WARRANTY that application will be able 410 * to interpret them. The reason is that we 411 * cannot make anything clever here. 412 * 413 * We are not end-node, so that if packet contains 414 * AH/ESP, we cannot make anything. 415 * Defragmentation also would be mistake, RA packets 416 * cannot be fragmented, because there is no warranty 417 * that different fragments will go along one path. --ANK 418 */ 419 if (opt->ra) { 420 u8 *ptr = skb_network_header(skb) + opt->ra; 421 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 422 return 0; 423 } 424 425 /* 426 * check and decrement ttl 427 */ 428 if (hdr->hop_limit <= 1) { 429 /* Force OUTPUT device used as source address */ 430 skb->dev = dst->dev; 431 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 432 IP6_INC_STATS_BH(net, 433 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 434 435 kfree_skb(skb); 436 return -ETIMEDOUT; 437 } 438 439 /* XXX: idev->cnf.proxy_ndp? */ 440 if (net->ipv6.devconf_all->proxy_ndp && 441 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 442 int proxied = ip6_forward_proxy_check(skb); 443 if (proxied > 0) 444 return ip6_input(skb); 445 else if (proxied < 0) { 446 IP6_INC_STATS(net, ip6_dst_idev(dst), 447 IPSTATS_MIB_INDISCARDS); 448 goto drop; 449 } 450 } 451 452 if (!xfrm6_route_forward(skb)) { 453 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 454 goto drop; 455 } 456 dst = skb_dst(skb); 457 458 /* IPv6 specs say nothing about it, but it is clear that we cannot 459 send redirects to source routed frames. 460 We don't send redirects to frames decapsulated from IPsec. 461 */ 462 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 463 !skb_sec_path(skb)) { 464 struct in6_addr *target = NULL; 465 struct rt6_info *rt; 466 struct neighbour *n = dst->neighbour; 467 468 /* 469 * incoming and outgoing devices are the same 470 * send a redirect. 471 */ 472 473 rt = (struct rt6_info *) dst; 474 if ((rt->rt6i_flags & RTF_GATEWAY)) 475 target = (struct in6_addr*)&n->primary_key; 476 else 477 target = &hdr->daddr; 478 479 if (!rt->rt6i_peer) 480 rt6_bind_peer(rt, 1); 481 482 /* Limit redirects both by destination (here) 483 and by source (inside ndisc_send_redirect) 484 */ 485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) 486 ndisc_send_redirect(skb, n, target); 487 } else { 488 int addrtype = ipv6_addr_type(&hdr->saddr); 489 490 /* This check is security critical. */ 491 if (addrtype == IPV6_ADDR_ANY || 492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 493 goto error; 494 if (addrtype & IPV6_ADDR_LINKLOCAL) { 495 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 496 ICMPV6_NOT_NEIGHBOUR, 0); 497 goto error; 498 } 499 } 500 501 mtu = dst_mtu(dst); 502 if (mtu < IPV6_MIN_MTU) 503 mtu = IPV6_MIN_MTU; 504 505 if (skb->len > mtu && !skb_is_gso(skb)) { 506 /* Again, force OUTPUT device used as source address */ 507 skb->dev = dst->dev; 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 509 IP6_INC_STATS_BH(net, 510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 511 IP6_INC_STATS_BH(net, 512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 513 kfree_skb(skb); 514 return -EMSGSIZE; 515 } 516 517 if (skb_cow(skb, dst->dev->hard_header_len)) { 518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 519 goto drop; 520 } 521 522 hdr = ipv6_hdr(skb); 523 524 /* Mangling hops number delayed to point after skb COW */ 525 526 hdr->hop_limit--; 527 528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 530 ip6_forward_finish); 531 532error: 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 534drop: 535 kfree_skb(skb); 536 return -EINVAL; 537} 538 539static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 540{ 541 to->pkt_type = from->pkt_type; 542 to->priority = from->priority; 543 to->protocol = from->protocol; 544 skb_dst_drop(to); 545 skb_dst_set(to, dst_clone(skb_dst(from))); 546 to->dev = from->dev; 547 to->mark = from->mark; 548 549#ifdef CONFIG_NET_SCHED 550 to->tc_index = from->tc_index; 551#endif 552 nf_copy(to, from); 553#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 555 to->nf_trace = from->nf_trace; 556#endif 557 skb_copy_secmark(to, from); 558} 559 560int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 561{ 562 u16 offset = sizeof(struct ipv6hdr); 563 struct ipv6_opt_hdr *exthdr = 564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 565 unsigned int packet_len = skb->tail - skb->network_header; 566 int found_rhdr = 0; 567 *nexthdr = &ipv6_hdr(skb)->nexthdr; 568 569 while (offset + 1 <= packet_len) { 570 571 switch (**nexthdr) { 572 573 case NEXTHDR_HOP: 574 break; 575 case NEXTHDR_ROUTING: 576 found_rhdr = 1; 577 break; 578 case NEXTHDR_DEST: 579#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 581 break; 582#endif 583 if (found_rhdr) 584 return offset; 585 break; 586 default : 587 return offset; 588 } 589 590 offset += ipv6_optlen(exthdr); 591 *nexthdr = &exthdr->nexthdr; 592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 593 offset); 594 } 595 596 return offset; 597} 598 599int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 600{ 601 struct sk_buff *frag; 602 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 604 struct ipv6hdr *tmp_hdr; 605 struct frag_hdr *fh; 606 unsigned int mtu, hlen, left, len; 607 __be32 frag_id = 0; 608 int ptr, offset = 0, err=0; 609 u8 *prevhdr, nexthdr = 0; 610 struct net *net = dev_net(skb_dst(skb)->dev); 611 612 hlen = ip6_find_1stfragopt(skb, &prevhdr); 613 nexthdr = *prevhdr; 614 615 mtu = ip6_skb_dst_mtu(skb); 616 617 /* We must not fragment if the socket is set to force MTU discovery 618 * or if the skb it not generated by a local socket. 619 */ 620 if (!skb->local_df && skb->len > mtu) { 621 skb->dev = skb_dst(skb)->dev; 622 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 623 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 624 IPSTATS_MIB_FRAGFAILS); 625 kfree_skb(skb); 626 return -EMSGSIZE; 627 } 628 629 if (np && np->frag_size < mtu) { 630 if (np->frag_size) 631 mtu = np->frag_size; 632 } 633 mtu -= hlen + sizeof(struct frag_hdr); 634 635 if (skb_has_frag_list(skb)) { 636 int first_len = skb_pagelen(skb); 637 struct sk_buff *frag2; 638 639 if (first_len - hlen > mtu || 640 ((first_len - hlen) & 7) || 641 skb_cloned(skb)) 642 goto slow_path; 643 644 skb_walk_frags(skb, frag) { 645 /* Correct geometry. */ 646 if (frag->len > mtu || 647 ((frag->len & 7) && frag->next) || 648 skb_headroom(frag) < hlen) 649 goto slow_path_clean; 650 651 /* Partially cloned skb? */ 652 if (skb_shared(frag)) 653 goto slow_path_clean; 654 655 BUG_ON(frag->sk); 656 if (skb->sk) { 657 frag->sk = skb->sk; 658 frag->destructor = sock_wfree; 659 } 660 skb->truesize -= frag->truesize; 661 } 662 663 err = 0; 664 offset = 0; 665 frag = skb_shinfo(skb)->frag_list; 666 skb_frag_list_init(skb); 667 /* BUILD HEADER */ 668 669 *prevhdr = NEXTHDR_FRAGMENT; 670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 671 if (!tmp_hdr) { 672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 673 IPSTATS_MIB_FRAGFAILS); 674 return -ENOMEM; 675 } 676 677 __skb_pull(skb, hlen); 678 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 679 __skb_push(skb, hlen); 680 skb_reset_network_header(skb); 681 memcpy(skb_network_header(skb), tmp_hdr, hlen); 682 683 ipv6_select_ident(fh); 684 fh->nexthdr = nexthdr; 685 fh->reserved = 0; 686 fh->frag_off = htons(IP6_MF); 687 frag_id = fh->identification; 688 689 first_len = skb_pagelen(skb); 690 skb->data_len = first_len - skb_headlen(skb); 691 skb->len = first_len; 692 ipv6_hdr(skb)->payload_len = htons(first_len - 693 sizeof(struct ipv6hdr)); 694 695 dst_hold(&rt->dst); 696 697 for (;;) { 698 /* Prepare header of the next frame, 699 * before previous one went down. */ 700 if (frag) { 701 frag->ip_summed = CHECKSUM_NONE; 702 skb_reset_transport_header(frag); 703 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 704 __skb_push(frag, hlen); 705 skb_reset_network_header(frag); 706 memcpy(skb_network_header(frag), tmp_hdr, 707 hlen); 708 offset += skb->len - hlen - sizeof(struct frag_hdr); 709 fh->nexthdr = nexthdr; 710 fh->reserved = 0; 711 fh->frag_off = htons(offset); 712 if (frag->next != NULL) 713 fh->frag_off |= htons(IP6_MF); 714 fh->identification = frag_id; 715 ipv6_hdr(frag)->payload_len = 716 htons(frag->len - 717 sizeof(struct ipv6hdr)); 718 ip6_copy_metadata(frag, skb); 719 } 720 721 err = output(skb); 722 if(!err) 723 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 724 IPSTATS_MIB_FRAGCREATES); 725 726 if (err || !frag) 727 break; 728 729 skb = frag; 730 frag = skb->next; 731 skb->next = NULL; 732 } 733 734 kfree(tmp_hdr); 735 736 if (err == 0) { 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 738 IPSTATS_MIB_FRAGOKS); 739 dst_release(&rt->dst); 740 return 0; 741 } 742 743 while (frag) { 744 skb = frag->next; 745 kfree_skb(frag); 746 frag = skb; 747 } 748 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 750 IPSTATS_MIB_FRAGFAILS); 751 dst_release(&rt->dst); 752 return err; 753 754slow_path_clean: 755 skb_walk_frags(skb, frag2) { 756 if (frag2 == frag) 757 break; 758 frag2->sk = NULL; 759 frag2->destructor = NULL; 760 skb->truesize += frag2->truesize; 761 } 762 } 763 764slow_path: 765 left = skb->len - hlen; /* Space per frame */ 766 ptr = hlen; /* Where to start from */ 767 768 /* 769 * Fragment the datagram. 770 */ 771 772 *prevhdr = NEXTHDR_FRAGMENT; 773 774 /* 775 * Keep copying data until we run out. 776 */ 777 while(left > 0) { 778 len = left; 779 /* IF: it doesn't fit, use 'mtu' - the data space left */ 780 if (len > mtu) 781 len = mtu; 782 /* IF: we are not sending upto and including the packet end 783 then align the next start on an eight byte boundary */ 784 if (len < left) { 785 len &= ~7; 786 } 787 /* 788 * Allocate buffer. 789 */ 790 791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { 792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 794 IPSTATS_MIB_FRAGFAILS); 795 err = -ENOMEM; 796 goto fail; 797 } 798 799 /* 800 * Set up data on packet 801 */ 802 803 ip6_copy_metadata(frag, skb); 804 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); 805 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 806 skb_reset_network_header(frag); 807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 808 frag->transport_header = (frag->network_header + hlen + 809 sizeof(struct frag_hdr)); 810 811 /* 812 * Charge the memory for the fragment to any owner 813 * it might possess 814 */ 815 if (skb->sk) 816 skb_set_owner_w(frag, skb->sk); 817 818 /* 819 * Copy the packet header into the new buffer. 820 */ 821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 822 823 /* 824 * Build fragment header. 825 */ 826 fh->nexthdr = nexthdr; 827 fh->reserved = 0; 828 if (!frag_id) { 829 ipv6_select_ident(fh); 830 frag_id = fh->identification; 831 } else 832 fh->identification = frag_id; 833 834 /* 835 * Copy a block of the IP datagram. 836 */ 837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 838 BUG(); 839 left -= len; 840 841 fh->frag_off = htons(offset); 842 if (left > 0) 843 fh->frag_off |= htons(IP6_MF); 844 ipv6_hdr(frag)->payload_len = htons(frag->len - 845 sizeof(struct ipv6hdr)); 846 847 ptr += len; 848 offset += len; 849 850 /* 851 * Put this fragment into the sending queue. 852 */ 853 err = output(frag); 854 if (err) 855 goto fail; 856 857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 858 IPSTATS_MIB_FRAGCREATES); 859 } 860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 861 IPSTATS_MIB_FRAGOKS); 862 kfree_skb(skb); 863 return err; 864 865fail: 866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 867 IPSTATS_MIB_FRAGFAILS); 868 kfree_skb(skb); 869 return err; 870} 871 872static inline int ip6_rt_check(struct rt6key *rt_key, 873 struct in6_addr *fl_addr, 874 struct in6_addr *addr_cache) 875{ 876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 878} 879 880static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 881 struct dst_entry *dst, 882 struct flowi *fl) 883{ 884 struct ipv6_pinfo *np = inet6_sk(sk); 885 struct rt6_info *rt = (struct rt6_info *)dst; 886 887 if (!dst) 888 goto out; 889 890 /* Yes, checking route validity in not connected 891 * case is not very simple. Take into account, 892 * that we do not support routing by source, TOS, 893 * and MSG_DONTROUTE --ANK (980726) 894 * 895 * 1. ip6_rt_check(): If route was host route, 896 * check that cached destination is current. 897 * If it is network route, we still may 898 * check its validity using saved pointer 899 * to the last used address: daddr_cache. 900 * We do not want to save whole address now, 901 * (because main consumer of this service 902 * is tcp, which has not this problem), 903 * so that the last trick works only on connected 904 * sockets. 905 * 2. oif also should be the same. 906 */ 907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 908#ifdef CONFIG_IPV6_SUBTREES 909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 910#endif 911 (fl->oif && fl->oif != dst->dev->ifindex)) { 912 dst_release(dst); 913 dst = NULL; 914 } 915 916out: 917 return dst; 918} 919 920static int ip6_dst_lookup_tail(struct sock *sk, 921 struct dst_entry **dst, struct flowi *fl) 922{ 923 int err; 924 struct net *net = sock_net(sk); 925 926 if (*dst == NULL) 927 *dst = ip6_route_output(net, sk, fl); 928 929 if ((err = (*dst)->error)) 930 goto out_err_release; 931 932 if (ipv6_addr_any(&fl->fl6_src)) { 933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 934 &fl->fl6_dst, 935 sk ? inet6_sk(sk)->srcprefs : 0, 936 &fl->fl6_src); 937 if (err) 938 goto out_err_release; 939 } 940 941#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 942 /* 943 * Here if the dst entry we've looked up 944 * has a neighbour entry that is in the INCOMPLETE 945 * state and the src address from the flow is 946 * marked as OPTIMISTIC, we release the found 947 * dst entry and replace it instead with the 948 * dst entry of the nexthop router 949 */ 950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 951 struct inet6_ifaddr *ifp; 952 struct flowi fl_gw; 953 int redirect; 954 955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 956 (*dst)->dev, 1); 957 958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 959 if (ifp) 960 in6_ifa_put(ifp); 961 962 if (redirect) { 963 /* 964 * We need to get the dst entry for the 965 * default router instead 966 */ 967 dst_release(*dst); 968 memcpy(&fl_gw, fl, sizeof(struct flowi)); 969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 970 *dst = ip6_route_output(net, sk, &fl_gw); 971 if ((err = (*dst)->error)) 972 goto out_err_release; 973 } 974 } 975#endif 976 977 return 0; 978 979out_err_release: 980 if (err == -ENETUNREACH) 981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 982 dst_release(*dst); 983 *dst = NULL; 984 return err; 985} 986 987/** 988 * ip6_dst_lookup - perform route lookup on flow 989 * @sk: socket which provides route info 990 * @dst: pointer to dst_entry * for result 991 * @fl: flow to lookup 992 * 993 * This function performs a route lookup on the given flow. 994 * 995 * It returns zero on success, or a standard errno code on error. 996 */ 997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 998{ 999 *dst = NULL; 1000 return ip6_dst_lookup_tail(sk, dst, fl); 1001} 1002EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1003 1004/** 1005 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1006 * @sk: socket which provides route info 1007 * @fl: flow to lookup 1008 * @final_dst: final destination address for ipsec lookup 1009 * @can_sleep: we are in a sleepable context 1010 * 1011 * This function performs a route lookup on the given flow. 1012 * 1013 * It returns a valid dst pointer on success, or a pointer encoded 1014 * error code. 1015 */ 1016struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, 1017 const struct in6_addr *final_dst, 1018 bool can_sleep) 1019{ 1020 struct dst_entry *dst = NULL; 1021 int err; 1022 1023 err = ip6_dst_lookup_tail(sk, &dst, fl); 1024 if (err) 1025 return ERR_PTR(err); 1026 if (final_dst) 1027 ipv6_addr_copy(&fl->fl6_dst, final_dst); 1028 if (can_sleep) { 1029 fl->flags |= FLOWI_FLAG_CAN_SLEEP; 1030 err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); 1031 if (err == -EREMOTE) 1032 return ip6_dst_blackhole(sock_net(sk), dst); 1033 if (err) 1034 return ERR_PTR(err); 1035 } else { 1036 err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); 1037 if (err) 1038 return ERR_PTR(err); 1039 } 1040 return dst; 1041} 1042EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1043 1044/** 1045 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1046 * @sk: socket which provides the dst cache and route info 1047 * @fl: flow to lookup 1048 * @final_dst: final destination address for ipsec lookup 1049 * @can_sleep: we are in a sleepable context 1050 * 1051 * This function performs a route lookup on the given flow with the 1052 * possibility of using the cached route in the socket if it is valid. 1053 * It will take the socket dst lock when operating on the dst cache. 1054 * As a result, this function can only be used in process context. 1055 * 1056 * It returns a valid dst pointer on success, or a pointer encoded 1057 * error code. 1058 */ 1059struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, 1060 const struct in6_addr *final_dst, 1061 bool can_sleep) 1062{ 1063 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1064 int err; 1065 1066 dst = ip6_sk_dst_check(sk, dst, fl); 1067 1068 err = ip6_dst_lookup_tail(sk, &dst, fl); 1069 if (err) 1070 return ERR_PTR(err); 1071 if (final_dst) 1072 ipv6_addr_copy(&fl->fl6_dst, final_dst); 1073 if (can_sleep) { 1074 fl->flags |= FLOWI_FLAG_CAN_SLEEP; 1075 err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); 1076 if (err == -EREMOTE) 1077 return ip6_dst_blackhole(sock_net(sk), dst); 1078 if (err) 1079 return ERR_PTR(err); 1080 } else { 1081 err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); 1082 if (err) 1083 return ERR_PTR(err); 1084 } 1085 return dst; 1086} 1087EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1088 1089static inline int ip6_ufo_append_data(struct sock *sk, 1090 int getfrag(void *from, char *to, int offset, int len, 1091 int odd, struct sk_buff *skb), 1092 void *from, int length, int hh_len, int fragheaderlen, 1093 int transhdrlen, int mtu,unsigned int flags) 1094 1095{ 1096 struct sk_buff *skb; 1097 int err; 1098 1099 /* There is support for UDP large send offload by network 1100 * device, so create one single skb packet containing complete 1101 * udp datagram 1102 */ 1103 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1104 skb = sock_alloc_send_skb(sk, 1105 hh_len + fragheaderlen + transhdrlen + 20, 1106 (flags & MSG_DONTWAIT), &err); 1107 if (skb == NULL) 1108 return -ENOMEM; 1109 1110 /* reserve space for Hardware header */ 1111 skb_reserve(skb, hh_len); 1112 1113 /* create space for UDP/IP header */ 1114 skb_put(skb,fragheaderlen + transhdrlen); 1115 1116 /* initialize network header pointer */ 1117 skb_reset_network_header(skb); 1118 1119 /* initialize protocol header pointer */ 1120 skb->transport_header = skb->network_header + fragheaderlen; 1121 1122 skb->ip_summed = CHECKSUM_PARTIAL; 1123 skb->csum = 0; 1124 } 1125 1126 err = skb_append_datato_frags(sk,skb, getfrag, from, 1127 (length - transhdrlen)); 1128 if (!err) { 1129 struct frag_hdr fhdr; 1130 1131 /* Specify the length of each IPv6 datagram fragment. 1132 * It has to be a multiple of 8. 1133 */ 1134 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1135 sizeof(struct frag_hdr)) & ~7; 1136 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1137 ipv6_select_ident(&fhdr); 1138 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1139 __skb_queue_tail(&sk->sk_write_queue, skb); 1140 1141 return 0; 1142 } 1143 /* There is not enough support do UPD LSO, 1144 * so follow normal path 1145 */ 1146 kfree_skb(skb); 1147 1148 return err; 1149} 1150 1151static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1152 gfp_t gfp) 1153{ 1154 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1155} 1156 1157static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1158 gfp_t gfp) 1159{ 1160 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1161} 1162 1163int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1164 int offset, int len, int odd, struct sk_buff *skb), 1165 void *from, int length, int transhdrlen, 1166 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1167 struct rt6_info *rt, unsigned int flags, int dontfrag) 1168{ 1169 struct inet_sock *inet = inet_sk(sk); 1170 struct ipv6_pinfo *np = inet6_sk(sk); 1171 struct sk_buff *skb; 1172 unsigned int maxfraglen, fragheaderlen; 1173 int exthdrlen; 1174 int hh_len; 1175 int mtu; 1176 int copy; 1177 int err; 1178 int offset = 0; 1179 int csummode = CHECKSUM_NONE; 1180 __u8 tx_flags = 0; 1181 1182 if (flags&MSG_PROBE) 1183 return 0; 1184 if (skb_queue_empty(&sk->sk_write_queue)) { 1185 /* 1186 * setup for corking 1187 */ 1188 if (opt) { 1189 if (WARN_ON(np->cork.opt)) 1190 return -EINVAL; 1191 1192 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1193 if (unlikely(np->cork.opt == NULL)) 1194 return -ENOBUFS; 1195 1196 np->cork.opt->tot_len = opt->tot_len; 1197 np->cork.opt->opt_flen = opt->opt_flen; 1198 np->cork.opt->opt_nflen = opt->opt_nflen; 1199 1200 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1201 sk->sk_allocation); 1202 if (opt->dst0opt && !np->cork.opt->dst0opt) 1203 return -ENOBUFS; 1204 1205 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1206 sk->sk_allocation); 1207 if (opt->dst1opt && !np->cork.opt->dst1opt) 1208 return -ENOBUFS; 1209 1210 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1211 sk->sk_allocation); 1212 if (opt->hopopt && !np->cork.opt->hopopt) 1213 return -ENOBUFS; 1214 1215 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1216 sk->sk_allocation); 1217 if (opt->srcrt && !np->cork.opt->srcrt) 1218 return -ENOBUFS; 1219 1220 /* need source address above miyazawa*/ 1221 } 1222 dst_hold(&rt->dst); 1223 inet->cork.dst = &rt->dst; 1224 inet->cork.fl = *fl; 1225 np->cork.hop_limit = hlimit; 1226 np->cork.tclass = tclass; 1227 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1228 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1229 if (np->frag_size < mtu) { 1230 if (np->frag_size) 1231 mtu = np->frag_size; 1232 } 1233 inet->cork.fragsize = mtu; 1234 if (dst_allfrag(rt->dst.path)) 1235 inet->cork.flags |= IPCORK_ALLFRAG; 1236 inet->cork.length = 0; 1237 sk->sk_sndmsg_page = NULL; 1238 sk->sk_sndmsg_off = 0; 1239 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - 1240 rt->rt6i_nfheader_len; 1241 length += exthdrlen; 1242 transhdrlen += exthdrlen; 1243 } else { 1244 rt = (struct rt6_info *)inet->cork.dst; 1245 fl = &inet->cork.fl; 1246 opt = np->cork.opt; 1247 transhdrlen = 0; 1248 exthdrlen = 0; 1249 mtu = inet->cork.fragsize; 1250 } 1251 1252 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1253 1254 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1255 (opt ? opt->opt_nflen : 0); 1256 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1257 1258 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1259 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1260 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1261 return -EMSGSIZE; 1262 } 1263 } 1264 1265 /* For UDP, check if TX timestamp is enabled */ 1266 if (sk->sk_type == SOCK_DGRAM) { 1267 err = sock_tx_timestamp(sk, &tx_flags); 1268 if (err) 1269 goto error; 1270 } 1271 1272 /* 1273 * Let's try using as much space as possible. 1274 * Use MTU if total length of the message fits into the MTU. 1275 * Otherwise, we need to reserve fragment header and 1276 * fragment alignment (= 8-15 octects, in total). 1277 * 1278 * Note that we may need to "move" the data from the tail of 1279 * of the buffer to the new fragment when we split 1280 * the message. 1281 * 1282 * FIXME: It may be fragmented into multiple chunks 1283 * at once if non-fragmentable extension headers 1284 * are too large. 1285 * --yoshfuji 1286 */ 1287 1288 inet->cork.length += length; 1289 if (length > mtu) { 1290 int proto = sk->sk_protocol; 1291 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1292 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); 1293 return -EMSGSIZE; 1294 } 1295 1296 if (proto == IPPROTO_UDP && 1297 (rt->dst.dev->features & NETIF_F_UFO)) { 1298 1299 err = ip6_ufo_append_data(sk, getfrag, from, length, 1300 hh_len, fragheaderlen, 1301 transhdrlen, mtu, flags); 1302 if (err) 1303 goto error; 1304 return 0; 1305 } 1306 } 1307 1308 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1309 goto alloc_new_skb; 1310 1311 while (length > 0) { 1312 /* Check if the remaining data fits into current packet. */ 1313 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1314 if (copy < length) 1315 copy = maxfraglen - skb->len; 1316 1317 if (copy <= 0) { 1318 char *data; 1319 unsigned int datalen; 1320 unsigned int fraglen; 1321 unsigned int fraggap; 1322 unsigned int alloclen; 1323 struct sk_buff *skb_prev; 1324alloc_new_skb: 1325 skb_prev = skb; 1326 1327 /* There's no room in the current skb */ 1328 if (skb_prev) 1329 fraggap = skb_prev->len - maxfraglen; 1330 else 1331 fraggap = 0; 1332 1333 /* 1334 * If remaining data exceeds the mtu, 1335 * we know we need more fragment(s). 1336 */ 1337 datalen = length + fraggap; 1338 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1339 datalen = maxfraglen - fragheaderlen; 1340 1341 fraglen = datalen + fragheaderlen; 1342 if ((flags & MSG_MORE) && 1343 !(rt->dst.dev->features&NETIF_F_SG)) 1344 alloclen = mtu; 1345 else 1346 alloclen = datalen + fragheaderlen; 1347 1348 /* 1349 * The last fragment gets additional space at tail. 1350 * Note: we overallocate on fragments with MSG_MODE 1351 * because we have no idea if we're the last one. 1352 */ 1353 if (datalen == length + fraggap) 1354 alloclen += rt->dst.trailer_len; 1355 1356 /* 1357 * We just reserve space for fragment header. 1358 * Note: this may be overallocation if the message 1359 * (without MSG_MORE) fits into the MTU. 1360 */ 1361 alloclen += sizeof(struct frag_hdr); 1362 1363 if (transhdrlen) { 1364 skb = sock_alloc_send_skb(sk, 1365 alloclen + hh_len, 1366 (flags & MSG_DONTWAIT), &err); 1367 } else { 1368 skb = NULL; 1369 if (atomic_read(&sk->sk_wmem_alloc) <= 1370 2 * sk->sk_sndbuf) 1371 skb = sock_wmalloc(sk, 1372 alloclen + hh_len, 1, 1373 sk->sk_allocation); 1374 if (unlikely(skb == NULL)) 1375 err = -ENOBUFS; 1376 else { 1377 /* Only the initial fragment 1378 * is time stamped. 1379 */ 1380 tx_flags = 0; 1381 } 1382 } 1383 if (skb == NULL) 1384 goto error; 1385 /* 1386 * Fill in the control structures 1387 */ 1388 skb->ip_summed = csummode; 1389 skb->csum = 0; 1390 /* reserve for fragmentation */ 1391 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1392 1393 if (sk->sk_type == SOCK_DGRAM) 1394 skb_shinfo(skb)->tx_flags = tx_flags; 1395 1396 /* 1397 * Find where to start putting bytes 1398 */ 1399 data = skb_put(skb, fraglen); 1400 skb_set_network_header(skb, exthdrlen); 1401 data += fragheaderlen; 1402 skb->transport_header = (skb->network_header + 1403 fragheaderlen); 1404 if (fraggap) { 1405 skb->csum = skb_copy_and_csum_bits( 1406 skb_prev, maxfraglen, 1407 data + transhdrlen, fraggap, 0); 1408 skb_prev->csum = csum_sub(skb_prev->csum, 1409 skb->csum); 1410 data += fraggap; 1411 pskb_trim_unique(skb_prev, maxfraglen); 1412 } 1413 copy = datalen - transhdrlen - fraggap; 1414 if (copy < 0) { 1415 err = -EINVAL; 1416 kfree_skb(skb); 1417 goto error; 1418 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1419 err = -EFAULT; 1420 kfree_skb(skb); 1421 goto error; 1422 } 1423 1424 offset += copy; 1425 length -= datalen - fraggap; 1426 transhdrlen = 0; 1427 exthdrlen = 0; 1428 csummode = CHECKSUM_NONE; 1429 1430 /* 1431 * Put the packet on the pending queue 1432 */ 1433 __skb_queue_tail(&sk->sk_write_queue, skb); 1434 continue; 1435 } 1436 1437 if (copy > length) 1438 copy = length; 1439 1440 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1441 unsigned int off; 1442 1443 off = skb->len; 1444 if (getfrag(from, skb_put(skb, copy), 1445 offset, copy, off, skb) < 0) { 1446 __skb_trim(skb, off); 1447 err = -EFAULT; 1448 goto error; 1449 } 1450 } else { 1451 int i = skb_shinfo(skb)->nr_frags; 1452 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1453 struct page *page = sk->sk_sndmsg_page; 1454 int off = sk->sk_sndmsg_off; 1455 unsigned int left; 1456 1457 if (page && (left = PAGE_SIZE - off) > 0) { 1458 if (copy >= left) 1459 copy = left; 1460 if (page != frag->page) { 1461 if (i == MAX_SKB_FRAGS) { 1462 err = -EMSGSIZE; 1463 goto error; 1464 } 1465 get_page(page); 1466 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1467 frag = &skb_shinfo(skb)->frags[i]; 1468 } 1469 } else if(i < MAX_SKB_FRAGS) { 1470 if (copy > PAGE_SIZE) 1471 copy = PAGE_SIZE; 1472 page = alloc_pages(sk->sk_allocation, 0); 1473 if (page == NULL) { 1474 err = -ENOMEM; 1475 goto error; 1476 } 1477 sk->sk_sndmsg_page = page; 1478 sk->sk_sndmsg_off = 0; 1479 1480 skb_fill_page_desc(skb, i, page, 0, 0); 1481 frag = &skb_shinfo(skb)->frags[i]; 1482 } else { 1483 err = -EMSGSIZE; 1484 goto error; 1485 } 1486 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1487 err = -EFAULT; 1488 goto error; 1489 } 1490 sk->sk_sndmsg_off += copy; 1491 frag->size += copy; 1492 skb->len += copy; 1493 skb->data_len += copy; 1494 skb->truesize += copy; 1495 atomic_add(copy, &sk->sk_wmem_alloc); 1496 } 1497 offset += copy; 1498 length -= copy; 1499 } 1500 return 0; 1501error: 1502 inet->cork.length -= length; 1503 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1504 return err; 1505} 1506 1507static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1508{ 1509 if (np->cork.opt) { 1510 kfree(np->cork.opt->dst0opt); 1511 kfree(np->cork.opt->dst1opt); 1512 kfree(np->cork.opt->hopopt); 1513 kfree(np->cork.opt->srcrt); 1514 kfree(np->cork.opt); 1515 np->cork.opt = NULL; 1516 } 1517 1518 if (inet->cork.dst) { 1519 dst_release(inet->cork.dst); 1520 inet->cork.dst = NULL; 1521 inet->cork.flags &= ~IPCORK_ALLFRAG; 1522 } 1523 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1524} 1525 1526int ip6_push_pending_frames(struct sock *sk) 1527{ 1528 struct sk_buff *skb, *tmp_skb; 1529 struct sk_buff **tail_skb; 1530 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1531 struct inet_sock *inet = inet_sk(sk); 1532 struct ipv6_pinfo *np = inet6_sk(sk); 1533 struct net *net = sock_net(sk); 1534 struct ipv6hdr *hdr; 1535 struct ipv6_txoptions *opt = np->cork.opt; 1536 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1537 struct flowi *fl = &inet->cork.fl; 1538 unsigned char proto = fl->proto; 1539 int err = 0; 1540 1541 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1542 goto out; 1543 tail_skb = &(skb_shinfo(skb)->frag_list); 1544 1545 /* move skb->data to ip header from ext header */ 1546 if (skb->data < skb_network_header(skb)) 1547 __skb_pull(skb, skb_network_offset(skb)); 1548 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1549 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1550 *tail_skb = tmp_skb; 1551 tail_skb = &(tmp_skb->next); 1552 skb->len += tmp_skb->len; 1553 skb->data_len += tmp_skb->len; 1554 skb->truesize += tmp_skb->truesize; 1555 tmp_skb->destructor = NULL; 1556 tmp_skb->sk = NULL; 1557 } 1558 1559 /* Allow local fragmentation. */ 1560 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1561 skb->local_df = 1; 1562 1563 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1564 __skb_pull(skb, skb_network_header_len(skb)); 1565 if (opt && opt->opt_flen) 1566 ipv6_push_frag_opts(skb, opt, &proto); 1567 if (opt && opt->opt_nflen) 1568 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1569 1570 skb_push(skb, sizeof(struct ipv6hdr)); 1571 skb_reset_network_header(skb); 1572 hdr = ipv6_hdr(skb); 1573 1574 *(__be32*)hdr = fl->fl6_flowlabel | 1575 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1576 1577 hdr->hop_limit = np->cork.hop_limit; 1578 hdr->nexthdr = proto; 1579 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1580 ipv6_addr_copy(&hdr->daddr, final_dst); 1581 1582 skb->priority = sk->sk_priority; 1583 skb->mark = sk->sk_mark; 1584 1585 skb_dst_set(skb, dst_clone(&rt->dst)); 1586 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1587 if (proto == IPPROTO_ICMPV6) { 1588 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1589 1590 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1591 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1592 } 1593 1594 err = ip6_local_out(skb); 1595 if (err) { 1596 if (err > 0) 1597 err = net_xmit_errno(err); 1598 if (err) 1599 goto error; 1600 } 1601 1602out: 1603 ip6_cork_release(inet, np); 1604 return err; 1605error: 1606 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1607 goto out; 1608} 1609 1610void ip6_flush_pending_frames(struct sock *sk) 1611{ 1612 struct sk_buff *skb; 1613 1614 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1615 if (skb_dst(skb)) 1616 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1617 IPSTATS_MIB_OUTDISCARDS); 1618 kfree_skb(skb); 1619 } 1620 1621 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1622} 1623