af_packet.c revision 2d37a186cedc51502dbee71c16ae0fbd9114d62c
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * PACKET - implements raw packet sockets. 7 * 8 * Authors: Ross Biro 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 10 * Alan Cox, <gw4pts@gw4pts.ampr.org> 11 * 12 * Fixes: 13 * Alan Cox : verify_area() now used correctly 14 * Alan Cox : new skbuff lists, look ma no backlogs! 15 * Alan Cox : tidied skbuff lists. 16 * Alan Cox : Now uses generic datagram routines I 17 * added. Also fixed the peek/read crash 18 * from all old Linux datagram code. 19 * Alan Cox : Uses the improved datagram code. 20 * Alan Cox : Added NULL's for socket options. 21 * Alan Cox : Re-commented the code. 22 * Alan Cox : Use new kernel side addressing 23 * Rob Janssen : Correct MTU usage. 24 * Dave Platt : Counter leaks caused by incorrect 25 * interrupt locking and some slightly 26 * dubious gcc output. Can you read 27 * compiler: it said _VOLATILE_ 28 * Richard Kooijman : Timestamp fixes. 29 * Alan Cox : New buffers. Use sk->mac.raw. 30 * Alan Cox : sendmsg/recvmsg support. 31 * Alan Cox : Protocol setting support 32 * Alexey Kuznetsov : Untied from IPv4 stack. 33 * Cyrus Durgin : Fixed kerneld for kmod. 34 * Michal Ostrowski : Module initialization cleanup. 35 * Ulises Alonso : Frame number limit removal and 36 * packet_set_ring memory leak. 37 * Eric Biederman : Allow for > 8 byte hardware addresses. 38 * The convention is that longer addresses 39 * will simply extend the hardware address 40 * byte arrays at the end of sockaddr_ll 41 * and packet_mreq. 42 * Johann Baudy : Added TX RING. 43 * 44 * This program is free software; you can redistribute it and/or 45 * modify it under the terms of the GNU General Public License 46 * as published by the Free Software Foundation; either version 47 * 2 of the License, or (at your option) any later version. 48 * 49 */ 50 51#include <linux/types.h> 52#include <linux/mm.h> 53#include <linux/capability.h> 54#include <linux/fcntl.h> 55#include <linux/socket.h> 56#include <linux/in.h> 57#include <linux/inet.h> 58#include <linux/netdevice.h> 59#include <linux/if_packet.h> 60#include <linux/wireless.h> 61#include <linux/kernel.h> 62#include <linux/kmod.h> 63#include <net/net_namespace.h> 64#include <net/ip.h> 65#include <net/protocol.h> 66#include <linux/skbuff.h> 67#include <net/sock.h> 68#include <linux/errno.h> 69#include <linux/timer.h> 70#include <asm/system.h> 71#include <asm/uaccess.h> 72#include <asm/ioctls.h> 73#include <asm/page.h> 74#include <asm/cacheflush.h> 75#include <asm/io.h> 76#include <linux/proc_fs.h> 77#include <linux/seq_file.h> 78#include <linux/poll.h> 79#include <linux/module.h> 80#include <linux/init.h> 81#include <linux/mutex.h> 82 83#ifdef CONFIG_INET 84#include <net/inet_common.h> 85#endif 86 87/* 88 Assumptions: 89 - if device has no dev->hard_header routine, it adds and removes ll header 90 inside itself. In this case ll header is invisible outside of device, 91 but higher levels still should reserve dev->hard_header_len. 92 Some devices are enough clever to reallocate skb, when header 93 will not fit to reserved space (tunnel), another ones are silly 94 (PPP). 95 - packet socket receives packets with pulled ll header, 96 so that SOCK_RAW should push it back. 97 98On receive: 99----------- 100 101Incoming, dev->hard_header!=NULL 102 mac_header -> ll header 103 data -> data 104 105Outgoing, dev->hard_header!=NULL 106 mac_header -> ll header 107 data -> ll header 108 109Incoming, dev->hard_header==NULL 110 mac_header -> UNKNOWN position. It is very likely, that it points to ll 111 header. PPP makes it, that is wrong, because introduce 112 assymetry between rx and tx paths. 113 data -> data 114 115Outgoing, dev->hard_header==NULL 116 mac_header -> data. ll header is still not built! 117 data -> data 118 119Resume 120 If dev->hard_header==NULL we are unlikely to restore sensible ll header. 121 122 123On transmit: 124------------ 125 126dev->hard_header != NULL 127 mac_header -> ll header 128 data -> ll header 129 130dev->hard_header == NULL (ll header is added by device, we cannot control it) 131 mac_header -> data 132 data -> data 133 134 We should set nh.raw on output to correct posistion, 135 packet classifier depends on it. 136 */ 137 138/* Private packet socket structures. */ 139 140struct packet_mclist { 141 struct packet_mclist *next; 142 int ifindex; 143 int count; 144 unsigned short type; 145 unsigned short alen; 146 unsigned char addr[MAX_ADDR_LEN]; 147}; 148/* identical to struct packet_mreq except it has 149 * a longer address field. 150 */ 151struct packet_mreq_max { 152 int mr_ifindex; 153 unsigned short mr_type; 154 unsigned short mr_alen; 155 unsigned char mr_address[MAX_ADDR_LEN]; 156}; 157 158#ifdef CONFIG_PACKET_MMAP 159static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 160 int closing, int tx_ring); 161 162struct packet_ring_buffer { 163 char **pg_vec; 164 unsigned int head; 165 unsigned int frames_per_block; 166 unsigned int frame_size; 167 unsigned int frame_max; 168 169 unsigned int pg_vec_order; 170 unsigned int pg_vec_pages; 171 unsigned int pg_vec_len; 172 173 atomic_t pending; 174}; 175 176struct packet_sock; 177static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); 178#endif 179 180static void packet_flush_mclist(struct sock *sk); 181 182struct packet_sock { 183 /* struct sock has to be the first member of packet_sock */ 184 struct sock sk; 185 struct tpacket_stats stats; 186#ifdef CONFIG_PACKET_MMAP 187 struct packet_ring_buffer rx_ring; 188 struct packet_ring_buffer tx_ring; 189 int copy_thresh; 190#endif 191 struct packet_type prot_hook; 192 spinlock_t bind_lock; 193 struct mutex pg_vec_lock; 194 unsigned int running:1, /* prot_hook is attached*/ 195 auxdata:1, 196 origdev:1; 197 int ifindex; /* bound device */ 198 __be16 num; 199 struct packet_mclist *mclist; 200#ifdef CONFIG_PACKET_MMAP 201 atomic_t mapped; 202 enum tpacket_versions tp_version; 203 unsigned int tp_hdrlen; 204 unsigned int tp_reserve; 205 unsigned int tp_loss:1; 206#endif 207}; 208 209struct packet_skb_cb { 210 unsigned int origlen; 211 union { 212 struct sockaddr_pkt pkt; 213 struct sockaddr_ll ll; 214 } sa; 215}; 216 217#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 218 219#ifdef CONFIG_PACKET_MMAP 220 221static void __packet_set_status(struct packet_sock *po, void *frame, int status) 222{ 223 union { 224 struct tpacket_hdr *h1; 225 struct tpacket2_hdr *h2; 226 void *raw; 227 } h; 228 229 h.raw = frame; 230 switch (po->tp_version) { 231 case TPACKET_V1: 232 h.h1->tp_status = status; 233 flush_dcache_page(virt_to_page(&h.h1->tp_status)); 234 break; 235 case TPACKET_V2: 236 h.h2->tp_status = status; 237 flush_dcache_page(virt_to_page(&h.h2->tp_status)); 238 break; 239 default: 240 pr_err("TPACKET version not supported\n"); 241 BUG(); 242 } 243 244 smp_wmb(); 245} 246 247static int __packet_get_status(struct packet_sock *po, void *frame) 248{ 249 union { 250 struct tpacket_hdr *h1; 251 struct tpacket2_hdr *h2; 252 void *raw; 253 } h; 254 255 smp_rmb(); 256 257 h.raw = frame; 258 switch (po->tp_version) { 259 case TPACKET_V1: 260 flush_dcache_page(virt_to_page(&h.h1->tp_status)); 261 return h.h1->tp_status; 262 case TPACKET_V2: 263 flush_dcache_page(virt_to_page(&h.h2->tp_status)); 264 return h.h2->tp_status; 265 default: 266 pr_err("TPACKET version not supported\n"); 267 BUG(); 268 return 0; 269 } 270} 271 272static void *packet_lookup_frame(struct packet_sock *po, 273 struct packet_ring_buffer *rb, 274 unsigned int position, 275 int status) 276{ 277 unsigned int pg_vec_pos, frame_offset; 278 union { 279 struct tpacket_hdr *h1; 280 struct tpacket2_hdr *h2; 281 void *raw; 282 } h; 283 284 pg_vec_pos = position / rb->frames_per_block; 285 frame_offset = position % rb->frames_per_block; 286 287 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size); 288 289 if (status != __packet_get_status(po, h.raw)) 290 return NULL; 291 292 return h.raw; 293} 294 295static inline void *packet_current_frame(struct packet_sock *po, 296 struct packet_ring_buffer *rb, 297 int status) 298{ 299 return packet_lookup_frame(po, rb, rb->head, status); 300} 301 302static inline void *packet_previous_frame(struct packet_sock *po, 303 struct packet_ring_buffer *rb, 304 int status) 305{ 306 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; 307 return packet_lookup_frame(po, rb, previous, status); 308} 309 310static inline void packet_increment_head(struct packet_ring_buffer *buff) 311{ 312 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 313} 314 315#endif 316 317static inline struct packet_sock *pkt_sk(struct sock *sk) 318{ 319 return (struct packet_sock *)sk; 320} 321 322static void packet_sock_destruct(struct sock *sk) 323{ 324 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 325 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 326 327 if (!sock_flag(sk, SOCK_DEAD)) { 328 pr_err("Attempt to release alive packet socket: %p\n", sk); 329 return; 330 } 331 332 sk_refcnt_debug_dec(sk); 333} 334 335 336static const struct proto_ops packet_ops; 337 338static const struct proto_ops packet_ops_spkt; 339 340static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, 341 struct packet_type *pt, struct net_device *orig_dev) 342{ 343 struct sock *sk; 344 struct sockaddr_pkt *spkt; 345 346 /* 347 * When we registered the protocol we saved the socket in the data 348 * field for just this event. 349 */ 350 351 sk = pt->af_packet_priv; 352 353 /* 354 * Yank back the headers [hope the device set this 355 * right or kerboom...] 356 * 357 * Incoming packets have ll header pulled, 358 * push it back. 359 * 360 * For outgoing ones skb->data == skb_mac_header(skb) 361 * so that this procedure is noop. 362 */ 363 364 if (skb->pkt_type == PACKET_LOOPBACK) 365 goto out; 366 367 if (dev_net(dev) != sock_net(sk)) 368 goto out; 369 370 skb = skb_share_check(skb, GFP_ATOMIC); 371 if (skb == NULL) 372 goto oom; 373 374 /* drop any routing info */ 375 skb_dst_drop(skb); 376 377 /* drop conntrack reference */ 378 nf_reset(skb); 379 380 spkt = &PACKET_SKB_CB(skb)->sa.pkt; 381 382 skb_push(skb, skb->data - skb_mac_header(skb)); 383 384 /* 385 * The SOCK_PACKET socket receives _all_ frames. 386 */ 387 388 spkt->spkt_family = dev->type; 389 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); 390 spkt->spkt_protocol = skb->protocol; 391 392 /* 393 * Charge the memory to the socket. This is done specifically 394 * to prevent sockets using all the memory up. 395 */ 396 397 if (sock_queue_rcv_skb(sk, skb) == 0) 398 return 0; 399 400out: 401 kfree_skb(skb); 402oom: 403 return 0; 404} 405 406 407/* 408 * Output a raw packet to a device layer. This bypasses all the other 409 * protocol layers and you must therefore supply it with a complete frame 410 */ 411 412static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, 413 struct msghdr *msg, size_t len) 414{ 415 struct sock *sk = sock->sk; 416 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; 417 struct sk_buff *skb; 418 struct net_device *dev; 419 __be16 proto = 0; 420 int err; 421 422 /* 423 * Get and verify the address. 424 */ 425 426 if (saddr) { 427 if (msg->msg_namelen < sizeof(struct sockaddr)) 428 return -EINVAL; 429 if (msg->msg_namelen == sizeof(struct sockaddr_pkt)) 430 proto = saddr->spkt_protocol; 431 } else 432 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */ 433 434 /* 435 * Find the device first to size check it 436 */ 437 438 saddr->spkt_device[13] = 0; 439 dev = dev_get_by_name(sock_net(sk), saddr->spkt_device); 440 err = -ENODEV; 441 if (dev == NULL) 442 goto out_unlock; 443 444 err = -ENETDOWN; 445 if (!(dev->flags & IFF_UP)) 446 goto out_unlock; 447 448 /* 449 * You may not queue a frame bigger than the mtu. This is the lowest level 450 * raw protocol and you must do your own fragmentation at this level. 451 */ 452 453 err = -EMSGSIZE; 454 if (len > dev->mtu + dev->hard_header_len) 455 goto out_unlock; 456 457 err = -ENOBUFS; 458 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL); 459 460 /* 461 * If the write buffer is full, then tough. At this level the user 462 * gets to deal with the problem - do your own algorithmic backoffs. 463 * That's far more flexible. 464 */ 465 466 if (skb == NULL) 467 goto out_unlock; 468 469 /* 470 * Fill it in 471 */ 472 473 /* FIXME: Save some space for broken drivers that write a 474 * hard header at transmission time by themselves. PPP is the 475 * notable one here. This should really be fixed at the driver level. 476 */ 477 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 478 skb_reset_network_header(skb); 479 480 /* Try to align data part correctly */ 481 if (dev->header_ops) { 482 skb->data -= dev->hard_header_len; 483 skb->tail -= dev->hard_header_len; 484 if (len < dev->hard_header_len) 485 skb_reset_network_header(skb); 486 } 487 488 /* Returns -EFAULT on error */ 489 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 490 skb->protocol = proto; 491 skb->dev = dev; 492 skb->priority = sk->sk_priority; 493 skb->mark = sk->sk_mark; 494 if (err) 495 goto out_free; 496 497 /* 498 * Now send it 499 */ 500 501 dev_queue_xmit(skb); 502 dev_put(dev); 503 return len; 504 505out_free: 506 kfree_skb(skb); 507out_unlock: 508 if (dev) 509 dev_put(dev); 510 return err; 511} 512 513static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, 514 unsigned int res) 515{ 516 struct sk_filter *filter; 517 518 rcu_read_lock_bh(); 519 filter = rcu_dereference(sk->sk_filter); 520 if (filter != NULL) 521 res = sk_run_filter(skb, filter->insns, filter->len); 522 rcu_read_unlock_bh(); 523 524 return res; 525} 526 527/* 528 * If we've lost frames since the last time we queued one to the 529 * sk_receive_queue, we need to record it here. 530 * This must be called under the protection of the socket lock 531 * to prevent racing with other softirqs and user space 532 */ 533static inline void record_packet_gap(struct sk_buff *skb, 534 struct packet_sock *po) 535{ 536 /* 537 * We overload the mark field here, since we're about 538 * to enqueue to a receive queue and no body else will 539 * use this field at this point 540 */ 541 skb->mark = po->stats.tp_gap; 542 po->stats.tp_gap = 0; 543 return; 544 545} 546 547static inline __u32 check_packet_gap(struct sk_buff *skb) 548{ 549 return skb->mark; 550} 551 552/* 553 This function makes lazy skb cloning in hope that most of packets 554 are discarded by BPF. 555 556 Note tricky part: we DO mangle shared skb! skb->data, skb->len 557 and skb->cb are mangled. It works because (and until) packets 558 falling here are owned by current CPU. Output packets are cloned 559 by dev_queue_xmit_nit(), input packets are processed by net_bh 560 sequencially, so that if we return skb to original state on exit, 561 we will not harm anyone. 562 */ 563 564static int packet_rcv(struct sk_buff *skb, struct net_device *dev, 565 struct packet_type *pt, struct net_device *orig_dev) 566{ 567 struct sock *sk; 568 struct sockaddr_ll *sll; 569 struct packet_sock *po; 570 u8 *skb_head = skb->data; 571 int skb_len = skb->len; 572 unsigned int snaplen, res; 573 574 if (skb->pkt_type == PACKET_LOOPBACK) 575 goto drop; 576 577 sk = pt->af_packet_priv; 578 po = pkt_sk(sk); 579 580 if (dev_net(dev) != sock_net(sk)) 581 goto drop; 582 583 skb->dev = dev; 584 585 if (dev->header_ops) { 586 /* The device has an explicit notion of ll header, 587 exported to higher levels. 588 589 Otherwise, the device hides datails of it frame 590 structure, so that corresponding packet head 591 never delivered to user. 592 */ 593 if (sk->sk_type != SOCK_DGRAM) 594 skb_push(skb, skb->data - skb_mac_header(skb)); 595 else if (skb->pkt_type == PACKET_OUTGOING) { 596 /* Special case: outgoing packets have ll header at head */ 597 skb_pull(skb, skb_network_offset(skb)); 598 } 599 } 600 601 snaplen = skb->len; 602 603 res = run_filter(skb, sk, snaplen); 604 if (!res) 605 goto drop_n_restore; 606 if (snaplen > res) 607 snaplen = res; 608 609 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 610 (unsigned)sk->sk_rcvbuf) 611 goto drop_n_acct; 612 613 if (skb_shared(skb)) { 614 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 615 if (nskb == NULL) 616 goto drop_n_acct; 617 618 if (skb_head != skb->data) { 619 skb->data = skb_head; 620 skb->len = skb_len; 621 } 622 kfree_skb(skb); 623 skb = nskb; 624 } 625 626 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 > 627 sizeof(skb->cb)); 628 629 sll = &PACKET_SKB_CB(skb)->sa.ll; 630 sll->sll_family = AF_PACKET; 631 sll->sll_hatype = dev->type; 632 sll->sll_protocol = skb->protocol; 633 sll->sll_pkttype = skb->pkt_type; 634 if (unlikely(po->origdev)) 635 sll->sll_ifindex = orig_dev->ifindex; 636 else 637 sll->sll_ifindex = dev->ifindex; 638 639 sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 640 641 PACKET_SKB_CB(skb)->origlen = skb->len; 642 643 if (pskb_trim(skb, snaplen)) 644 goto drop_n_acct; 645 646 skb_set_owner_r(skb, sk); 647 skb->dev = NULL; 648 skb_dst_drop(skb); 649 650 /* drop conntrack reference */ 651 nf_reset(skb); 652 653 spin_lock(&sk->sk_receive_queue.lock); 654 po->stats.tp_packets++; 655 record_packet_gap(skb, po); 656 __skb_queue_tail(&sk->sk_receive_queue, skb); 657 spin_unlock(&sk->sk_receive_queue.lock); 658 sk->sk_data_ready(sk, skb->len); 659 return 0; 660 661drop_n_acct: 662 spin_lock(&sk->sk_receive_queue.lock); 663 po->stats.tp_drops++; 664 po->stats.tp_gap++; 665 spin_unlock(&sk->sk_receive_queue.lock); 666 667drop_n_restore: 668 if (skb_head != skb->data && skb_shared(skb)) { 669 skb->data = skb_head; 670 skb->len = skb_len; 671 } 672drop: 673 consume_skb(skb); 674 return 0; 675} 676 677#ifdef CONFIG_PACKET_MMAP 678static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, 679 struct packet_type *pt, struct net_device *orig_dev) 680{ 681 struct sock *sk; 682 struct packet_sock *po; 683 struct sockaddr_ll *sll; 684 union { 685 struct tpacket_hdr *h1; 686 struct tpacket2_hdr *h2; 687 void *raw; 688 } h; 689 u8 *skb_head = skb->data; 690 int skb_len = skb->len; 691 unsigned int snaplen, res; 692 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; 693 unsigned short macoff, netoff, hdrlen; 694 struct sk_buff *copy_skb = NULL; 695 struct timeval tv; 696 struct timespec ts; 697 698 if (skb->pkt_type == PACKET_LOOPBACK) 699 goto drop; 700 701 sk = pt->af_packet_priv; 702 po = pkt_sk(sk); 703 704 if (dev_net(dev) != sock_net(sk)) 705 goto drop; 706 707 if (dev->header_ops) { 708 if (sk->sk_type != SOCK_DGRAM) 709 skb_push(skb, skb->data - skb_mac_header(skb)); 710 else if (skb->pkt_type == PACKET_OUTGOING) { 711 /* Special case: outgoing packets have ll header at head */ 712 skb_pull(skb, skb_network_offset(skb)); 713 } 714 } 715 716 if (skb->ip_summed == CHECKSUM_PARTIAL) 717 status |= TP_STATUS_CSUMNOTREADY; 718 719 snaplen = skb->len; 720 721 res = run_filter(skb, sk, snaplen); 722 if (!res) 723 goto drop_n_restore; 724 if (snaplen > res) 725 snaplen = res; 726 727 if (sk->sk_type == SOCK_DGRAM) { 728 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + 729 po->tp_reserve; 730 } else { 731 unsigned maclen = skb_network_offset(skb); 732 netoff = TPACKET_ALIGN(po->tp_hdrlen + 733 (maclen < 16 ? 16 : maclen)) + 734 po->tp_reserve; 735 macoff = netoff - maclen; 736 } 737 738 if (macoff + snaplen > po->rx_ring.frame_size) { 739 if (po->copy_thresh && 740 atomic_read(&sk->sk_rmem_alloc) + skb->truesize < 741 (unsigned)sk->sk_rcvbuf) { 742 if (skb_shared(skb)) { 743 copy_skb = skb_clone(skb, GFP_ATOMIC); 744 } else { 745 copy_skb = skb_get(skb); 746 skb_head = skb->data; 747 } 748 if (copy_skb) 749 skb_set_owner_r(copy_skb, sk); 750 } 751 snaplen = po->rx_ring.frame_size - macoff; 752 if ((int)snaplen < 0) 753 snaplen = 0; 754 } 755 756 spin_lock(&sk->sk_receive_queue.lock); 757 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); 758 if (!h.raw) 759 goto ring_is_full; 760 packet_increment_head(&po->rx_ring); 761 po->stats.tp_packets++; 762 if (copy_skb) { 763 status |= TP_STATUS_COPY; 764 __skb_queue_tail(&sk->sk_receive_queue, copy_skb); 765 } 766 if (!po->stats.tp_drops) 767 status &= ~TP_STATUS_LOSING; 768 spin_unlock(&sk->sk_receive_queue.lock); 769 770 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 771 772 switch (po->tp_version) { 773 case TPACKET_V1: 774 h.h1->tp_len = skb->len; 775 h.h1->tp_snaplen = snaplen; 776 h.h1->tp_mac = macoff; 777 h.h1->tp_net = netoff; 778 if (skb->tstamp.tv64) 779 tv = ktime_to_timeval(skb->tstamp); 780 else 781 do_gettimeofday(&tv); 782 h.h1->tp_sec = tv.tv_sec; 783 h.h1->tp_usec = tv.tv_usec; 784 hdrlen = sizeof(*h.h1); 785 break; 786 case TPACKET_V2: 787 h.h2->tp_len = skb->len; 788 h.h2->tp_snaplen = snaplen; 789 h.h2->tp_mac = macoff; 790 h.h2->tp_net = netoff; 791 if (skb->tstamp.tv64) 792 ts = ktime_to_timespec(skb->tstamp); 793 else 794 getnstimeofday(&ts); 795 h.h2->tp_sec = ts.tv_sec; 796 h.h2->tp_nsec = ts.tv_nsec; 797 h.h2->tp_vlan_tci = skb->vlan_tci; 798 hdrlen = sizeof(*h.h2); 799 break; 800 default: 801 BUG(); 802 } 803 804 sll = h.raw + TPACKET_ALIGN(hdrlen); 805 sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 806 sll->sll_family = AF_PACKET; 807 sll->sll_hatype = dev->type; 808 sll->sll_protocol = skb->protocol; 809 sll->sll_pkttype = skb->pkt_type; 810 if (unlikely(po->origdev)) 811 sll->sll_ifindex = orig_dev->ifindex; 812 else 813 sll->sll_ifindex = dev->ifindex; 814 815 __packet_set_status(po, h.raw, status); 816 smp_mb(); 817 { 818 struct page *p_start, *p_end; 819 u8 *h_end = h.raw + macoff + snaplen - 1; 820 821 p_start = virt_to_page(h.raw); 822 p_end = virt_to_page(h_end); 823 while (p_start <= p_end) { 824 flush_dcache_page(p_start); 825 p_start++; 826 } 827 } 828 829 sk->sk_data_ready(sk, 0); 830 831drop_n_restore: 832 if (skb_head != skb->data && skb_shared(skb)) { 833 skb->data = skb_head; 834 skb->len = skb_len; 835 } 836drop: 837 kfree_skb(skb); 838 return 0; 839 840ring_is_full: 841 po->stats.tp_drops++; 842 po->stats.tp_gap++; 843 spin_unlock(&sk->sk_receive_queue.lock); 844 845 sk->sk_data_ready(sk, 0); 846 kfree_skb(copy_skb); 847 goto drop_n_restore; 848} 849 850static void tpacket_destruct_skb(struct sk_buff *skb) 851{ 852 struct packet_sock *po = pkt_sk(skb->sk); 853 void *ph; 854 855 BUG_ON(skb == NULL); 856 857 if (likely(po->tx_ring.pg_vec)) { 858 ph = skb_shinfo(skb)->destructor_arg; 859 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); 860 BUG_ON(atomic_read(&po->tx_ring.pending) == 0); 861 atomic_dec(&po->tx_ring.pending); 862 __packet_set_status(po, ph, TP_STATUS_AVAILABLE); 863 } 864 865 sock_wfree(skb); 866} 867 868static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 869 void *frame, struct net_device *dev, int size_max, 870 __be16 proto, unsigned char *addr) 871{ 872 union { 873 struct tpacket_hdr *h1; 874 struct tpacket2_hdr *h2; 875 void *raw; 876 } ph; 877 int to_write, offset, len, tp_len, nr_frags, len_max; 878 struct socket *sock = po->sk.sk_socket; 879 struct page *page; 880 void *data; 881 int err; 882 883 ph.raw = frame; 884 885 skb->protocol = proto; 886 skb->dev = dev; 887 skb->priority = po->sk.sk_priority; 888 skb->mark = po->sk.sk_mark; 889 skb_shinfo(skb)->destructor_arg = ph.raw; 890 891 switch (po->tp_version) { 892 case TPACKET_V2: 893 tp_len = ph.h2->tp_len; 894 break; 895 default: 896 tp_len = ph.h1->tp_len; 897 break; 898 } 899 if (unlikely(tp_len > size_max)) { 900 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); 901 return -EMSGSIZE; 902 } 903 904 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 905 skb_reset_network_header(skb); 906 907 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); 908 to_write = tp_len; 909 910 if (sock->type == SOCK_DGRAM) { 911 err = dev_hard_header(skb, dev, ntohs(proto), addr, 912 NULL, tp_len); 913 if (unlikely(err < 0)) 914 return -EINVAL; 915 } else if (dev->hard_header_len) { 916 /* net device doesn't like empty head */ 917 if (unlikely(tp_len <= dev->hard_header_len)) { 918 pr_err("packet size is too short (%d < %d)\n", 919 tp_len, dev->hard_header_len); 920 return -EINVAL; 921 } 922 923 skb_push(skb, dev->hard_header_len); 924 err = skb_store_bits(skb, 0, data, 925 dev->hard_header_len); 926 if (unlikely(err)) 927 return err; 928 929 data += dev->hard_header_len; 930 to_write -= dev->hard_header_len; 931 } 932 933 err = -EFAULT; 934 page = virt_to_page(data); 935 offset = offset_in_page(data); 936 len_max = PAGE_SIZE - offset; 937 len = ((to_write > len_max) ? len_max : to_write); 938 939 skb->data_len = to_write; 940 skb->len += to_write; 941 skb->truesize += to_write; 942 atomic_add(to_write, &po->sk.sk_wmem_alloc); 943 944 while (likely(to_write)) { 945 nr_frags = skb_shinfo(skb)->nr_frags; 946 947 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { 948 pr_err("Packet exceed the number of skb frags(%lu)\n", 949 MAX_SKB_FRAGS); 950 return -EFAULT; 951 } 952 953 flush_dcache_page(page); 954 get_page(page); 955 skb_fill_page_desc(skb, 956 nr_frags, 957 page++, offset, len); 958 to_write -= len; 959 offset = 0; 960 len_max = PAGE_SIZE; 961 len = ((to_write > len_max) ? len_max : to_write); 962 } 963 964 return tp_len; 965} 966 967static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 968{ 969 struct socket *sock; 970 struct sk_buff *skb; 971 struct net_device *dev; 972 __be16 proto; 973 int ifindex, err, reserve = 0; 974 void *ph; 975 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; 976 int tp_len, size_max; 977 unsigned char *addr; 978 int len_sum = 0; 979 int status = 0; 980 981 sock = po->sk.sk_socket; 982 983 mutex_lock(&po->pg_vec_lock); 984 985 err = -EBUSY; 986 if (saddr == NULL) { 987 ifindex = po->ifindex; 988 proto = po->num; 989 addr = NULL; 990 } else { 991 err = -EINVAL; 992 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 993 goto out; 994 if (msg->msg_namelen < (saddr->sll_halen 995 + offsetof(struct sockaddr_ll, 996 sll_addr))) 997 goto out; 998 ifindex = saddr->sll_ifindex; 999 proto = saddr->sll_protocol; 1000 addr = saddr->sll_addr; 1001 } 1002 1003 dev = dev_get_by_index(sock_net(&po->sk), ifindex); 1004 err = -ENXIO; 1005 if (unlikely(dev == NULL)) 1006 goto out; 1007 1008 reserve = dev->hard_header_len; 1009 1010 err = -ENETDOWN; 1011 if (unlikely(!(dev->flags & IFF_UP))) 1012 goto out_put; 1013 1014 size_max = po->tx_ring.frame_size 1015 - sizeof(struct skb_shared_info) 1016 - po->tp_hdrlen 1017 - LL_ALLOCATED_SPACE(dev) 1018 - sizeof(struct sockaddr_ll); 1019 1020 if (size_max > dev->mtu + reserve) 1021 size_max = dev->mtu + reserve; 1022 1023 do { 1024 ph = packet_current_frame(po, &po->tx_ring, 1025 TP_STATUS_SEND_REQUEST); 1026 1027 if (unlikely(ph == NULL)) { 1028 schedule(); 1029 continue; 1030 } 1031 1032 status = TP_STATUS_SEND_REQUEST; 1033 skb = sock_alloc_send_skb(&po->sk, 1034 LL_ALLOCATED_SPACE(dev) 1035 + sizeof(struct sockaddr_ll), 1036 0, &err); 1037 1038 if (unlikely(skb == NULL)) 1039 goto out_status; 1040 1041 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 1042 addr); 1043 1044 if (unlikely(tp_len < 0)) { 1045 if (po->tp_loss) { 1046 __packet_set_status(po, ph, 1047 TP_STATUS_AVAILABLE); 1048 packet_increment_head(&po->tx_ring); 1049 kfree_skb(skb); 1050 continue; 1051 } else { 1052 status = TP_STATUS_WRONG_FORMAT; 1053 err = tp_len; 1054 goto out_status; 1055 } 1056 } 1057 1058 skb->destructor = tpacket_destruct_skb; 1059 __packet_set_status(po, ph, TP_STATUS_SENDING); 1060 atomic_inc(&po->tx_ring.pending); 1061 1062 status = TP_STATUS_SEND_REQUEST; 1063 err = dev_queue_xmit(skb); 1064 if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0)) 1065 goto out_xmit; 1066 packet_increment_head(&po->tx_ring); 1067 len_sum += tp_len; 1068 } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT)) 1069 && (atomic_read(&po->tx_ring.pending)))) 1070 ); 1071 1072 err = len_sum; 1073 goto out_put; 1074 1075out_xmit: 1076 skb->destructor = sock_wfree; 1077 atomic_dec(&po->tx_ring.pending); 1078out_status: 1079 __packet_set_status(po, ph, status); 1080 kfree_skb(skb); 1081out_put: 1082 dev_put(dev); 1083out: 1084 mutex_unlock(&po->pg_vec_lock); 1085 return err; 1086} 1087#endif 1088 1089static int packet_snd(struct socket *sock, 1090 struct msghdr *msg, size_t len) 1091{ 1092 struct sock *sk = sock->sk; 1093 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; 1094 struct sk_buff *skb; 1095 struct net_device *dev; 1096 __be16 proto; 1097 unsigned char *addr; 1098 int ifindex, err, reserve = 0; 1099 1100 /* 1101 * Get and verify the address. 1102 */ 1103 1104 if (saddr == NULL) { 1105 struct packet_sock *po = pkt_sk(sk); 1106 1107 ifindex = po->ifindex; 1108 proto = po->num; 1109 addr = NULL; 1110 } else { 1111 err = -EINVAL; 1112 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 1113 goto out; 1114 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) 1115 goto out; 1116 ifindex = saddr->sll_ifindex; 1117 proto = saddr->sll_protocol; 1118 addr = saddr->sll_addr; 1119 } 1120 1121 1122 dev = dev_get_by_index(sock_net(sk), ifindex); 1123 err = -ENXIO; 1124 if (dev == NULL) 1125 goto out_unlock; 1126 if (sock->type == SOCK_RAW) 1127 reserve = dev->hard_header_len; 1128 1129 err = -ENETDOWN; 1130 if (!(dev->flags & IFF_UP)) 1131 goto out_unlock; 1132 1133 err = -EMSGSIZE; 1134 if (len > dev->mtu+reserve) 1135 goto out_unlock; 1136 1137 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), 1138 msg->msg_flags & MSG_DONTWAIT, &err); 1139 if (skb == NULL) 1140 goto out_unlock; 1141 1142 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1143 skb_reset_network_header(skb); 1144 1145 err = -EINVAL; 1146 if (sock->type == SOCK_DGRAM && 1147 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) 1148 goto out_free; 1149 1150 /* Returns -EFAULT on error */ 1151 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1152 if (err) 1153 goto out_free; 1154 1155 skb->protocol = proto; 1156 skb->dev = dev; 1157 skb->priority = sk->sk_priority; 1158 skb->mark = sk->sk_mark; 1159 1160 /* 1161 * Now send it 1162 */ 1163 1164 err = dev_queue_xmit(skb); 1165 if (err > 0 && (err = net_xmit_errno(err)) != 0) 1166 goto out_unlock; 1167 1168 dev_put(dev); 1169 1170 return len; 1171 1172out_free: 1173 kfree_skb(skb); 1174out_unlock: 1175 if (dev) 1176 dev_put(dev); 1177out: 1178 return err; 1179} 1180 1181static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, 1182 struct msghdr *msg, size_t len) 1183{ 1184#ifdef CONFIG_PACKET_MMAP 1185 struct sock *sk = sock->sk; 1186 struct packet_sock *po = pkt_sk(sk); 1187 if (po->tx_ring.pg_vec) 1188 return tpacket_snd(po, msg); 1189 else 1190#endif 1191 return packet_snd(sock, msg, len); 1192} 1193 1194/* 1195 * Close a PACKET socket. This is fairly simple. We immediately go 1196 * to 'closed' state and remove our protocol entry in the device list. 1197 */ 1198 1199static int packet_release(struct socket *sock) 1200{ 1201 struct sock *sk = sock->sk; 1202 struct packet_sock *po; 1203 struct net *net; 1204#ifdef CONFIG_PACKET_MMAP 1205 struct tpacket_req req; 1206#endif 1207 1208 if (!sk) 1209 return 0; 1210 1211 net = sock_net(sk); 1212 po = pkt_sk(sk); 1213 1214 write_lock_bh(&net->packet.sklist_lock); 1215 sk_del_node_init(sk); 1216 sock_prot_inuse_add(net, sk->sk_prot, -1); 1217 write_unlock_bh(&net->packet.sklist_lock); 1218 1219 /* 1220 * Unhook packet receive handler. 1221 */ 1222 1223 if (po->running) { 1224 /* 1225 * Remove the protocol hook 1226 */ 1227 dev_remove_pack(&po->prot_hook); 1228 po->running = 0; 1229 po->num = 0; 1230 __sock_put(sk); 1231 } 1232 1233 packet_flush_mclist(sk); 1234 1235#ifdef CONFIG_PACKET_MMAP 1236 memset(&req, 0, sizeof(req)); 1237 1238 if (po->rx_ring.pg_vec) 1239 packet_set_ring(sk, &req, 1, 0); 1240 1241 if (po->tx_ring.pg_vec) 1242 packet_set_ring(sk, &req, 1, 1); 1243#endif 1244 1245 /* 1246 * Now the socket is dead. No more input will appear. 1247 */ 1248 1249 sock_orphan(sk); 1250 sock->sk = NULL; 1251 1252 /* Purge queues */ 1253 1254 skb_queue_purge(&sk->sk_receive_queue); 1255 sk_refcnt_debug_release(sk); 1256 1257 sock_put(sk); 1258 return 0; 1259} 1260 1261/* 1262 * Attach a packet hook. 1263 */ 1264 1265static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) 1266{ 1267 struct packet_sock *po = pkt_sk(sk); 1268 /* 1269 * Detach an existing hook if present. 1270 */ 1271 1272 lock_sock(sk); 1273 1274 spin_lock(&po->bind_lock); 1275 if (po->running) { 1276 __sock_put(sk); 1277 po->running = 0; 1278 po->num = 0; 1279 spin_unlock(&po->bind_lock); 1280 dev_remove_pack(&po->prot_hook); 1281 spin_lock(&po->bind_lock); 1282 } 1283 1284 po->num = protocol; 1285 po->prot_hook.type = protocol; 1286 po->prot_hook.dev = dev; 1287 1288 po->ifindex = dev ? dev->ifindex : 0; 1289 1290 if (protocol == 0) 1291 goto out_unlock; 1292 1293 if (!dev || (dev->flags & IFF_UP)) { 1294 dev_add_pack(&po->prot_hook); 1295 sock_hold(sk); 1296 po->running = 1; 1297 } else { 1298 sk->sk_err = ENETDOWN; 1299 if (!sock_flag(sk, SOCK_DEAD)) 1300 sk->sk_error_report(sk); 1301 } 1302 1303out_unlock: 1304 spin_unlock(&po->bind_lock); 1305 release_sock(sk); 1306 return 0; 1307} 1308 1309/* 1310 * Bind a packet socket to a device 1311 */ 1312 1313static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, 1314 int addr_len) 1315{ 1316 struct sock *sk = sock->sk; 1317 char name[15]; 1318 struct net_device *dev; 1319 int err = -ENODEV; 1320 1321 /* 1322 * Check legality 1323 */ 1324 1325 if (addr_len != sizeof(struct sockaddr)) 1326 return -EINVAL; 1327 strlcpy(name, uaddr->sa_data, sizeof(name)); 1328 1329 dev = dev_get_by_name(sock_net(sk), name); 1330 if (dev) { 1331 err = packet_do_bind(sk, dev, pkt_sk(sk)->num); 1332 dev_put(dev); 1333 } 1334 return err; 1335} 1336 1337static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1338{ 1339 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; 1340 struct sock *sk = sock->sk; 1341 struct net_device *dev = NULL; 1342 int err; 1343 1344 1345 /* 1346 * Check legality 1347 */ 1348 1349 if (addr_len < sizeof(struct sockaddr_ll)) 1350 return -EINVAL; 1351 if (sll->sll_family != AF_PACKET) 1352 return -EINVAL; 1353 1354 if (sll->sll_ifindex) { 1355 err = -ENODEV; 1356 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); 1357 if (dev == NULL) 1358 goto out; 1359 } 1360 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); 1361 if (dev) 1362 dev_put(dev); 1363 1364out: 1365 return err; 1366} 1367 1368static struct proto packet_proto = { 1369 .name = "PACKET", 1370 .owner = THIS_MODULE, 1371 .obj_size = sizeof(struct packet_sock), 1372}; 1373 1374/* 1375 * Create a packet of type SOCK_PACKET. 1376 */ 1377 1378static int packet_create(struct net *net, struct socket *sock, int protocol) 1379{ 1380 struct sock *sk; 1381 struct packet_sock *po; 1382 __be16 proto = (__force __be16)protocol; /* weird, but documented */ 1383 int err; 1384 1385 if (!capable(CAP_NET_RAW)) 1386 return -EPERM; 1387 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && 1388 sock->type != SOCK_PACKET) 1389 return -ESOCKTNOSUPPORT; 1390 1391 sock->state = SS_UNCONNECTED; 1392 1393 err = -ENOBUFS; 1394 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); 1395 if (sk == NULL) 1396 goto out; 1397 1398 sock->ops = &packet_ops; 1399 if (sock->type == SOCK_PACKET) 1400 sock->ops = &packet_ops_spkt; 1401 1402 sock_init_data(sock, sk); 1403 1404 po = pkt_sk(sk); 1405 sk->sk_family = PF_PACKET; 1406 po->num = proto; 1407 1408 sk->sk_destruct = packet_sock_destruct; 1409 sk_refcnt_debug_inc(sk); 1410 1411 /* 1412 * Attach a protocol block 1413 */ 1414 1415 spin_lock_init(&po->bind_lock); 1416 mutex_init(&po->pg_vec_lock); 1417 po->prot_hook.func = packet_rcv; 1418 1419 if (sock->type == SOCK_PACKET) 1420 po->prot_hook.func = packet_rcv_spkt; 1421 1422 po->prot_hook.af_packet_priv = sk; 1423 1424 if (proto) { 1425 po->prot_hook.type = proto; 1426 dev_add_pack(&po->prot_hook); 1427 sock_hold(sk); 1428 po->running = 1; 1429 } 1430 1431 write_lock_bh(&net->packet.sklist_lock); 1432 sk_add_node(sk, &net->packet.sklist); 1433 sock_prot_inuse_add(net, &packet_proto, 1); 1434 write_unlock_bh(&net->packet.sklist_lock); 1435 return 0; 1436out: 1437 return err; 1438} 1439 1440/* 1441 * Pull a packet from our receive queue and hand it to the user. 1442 * If necessary we block. 1443 */ 1444 1445static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, 1446 struct msghdr *msg, size_t len, int flags) 1447{ 1448 struct sock *sk = sock->sk; 1449 struct sk_buff *skb; 1450 int copied, err; 1451 struct sockaddr_ll *sll; 1452 __u32 gap; 1453 1454 err = -EINVAL; 1455 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) 1456 goto out; 1457 1458#if 0 1459 /* What error should we return now? EUNATTACH? */ 1460 if (pkt_sk(sk)->ifindex < 0) 1461 return -ENODEV; 1462#endif 1463 1464 /* 1465 * Call the generic datagram receiver. This handles all sorts 1466 * of horrible races and re-entrancy so we can forget about it 1467 * in the protocol layers. 1468 * 1469 * Now it will return ENETDOWN, if device have just gone down, 1470 * but then it will block. 1471 */ 1472 1473 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); 1474 1475 /* 1476 * An error occurred so return it. Because skb_recv_datagram() 1477 * handles the blocking we don't see and worry about blocking 1478 * retries. 1479 */ 1480 1481 if (skb == NULL) 1482 goto out; 1483 1484 /* 1485 * If the address length field is there to be filled in, we fill 1486 * it in now. 1487 */ 1488 1489 sll = &PACKET_SKB_CB(skb)->sa.ll; 1490 if (sock->type == SOCK_PACKET) 1491 msg->msg_namelen = sizeof(struct sockaddr_pkt); 1492 else 1493 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); 1494 1495 /* 1496 * You lose any data beyond the buffer you gave. If it worries a 1497 * user program they can ask the device for its MTU anyway. 1498 */ 1499 1500 copied = skb->len; 1501 if (copied > len) { 1502 copied = len; 1503 msg->msg_flags |= MSG_TRUNC; 1504 } 1505 1506 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1507 if (err) 1508 goto out_free; 1509 1510 sock_recv_timestamp(msg, sk, skb); 1511 1512 if (msg->msg_name) 1513 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, 1514 msg->msg_namelen); 1515 1516 if (pkt_sk(sk)->auxdata) { 1517 struct tpacket_auxdata aux; 1518 1519 aux.tp_status = TP_STATUS_USER; 1520 if (skb->ip_summed == CHECKSUM_PARTIAL) 1521 aux.tp_status |= TP_STATUS_CSUMNOTREADY; 1522 aux.tp_len = PACKET_SKB_CB(skb)->origlen; 1523 aux.tp_snaplen = skb->len; 1524 aux.tp_mac = 0; 1525 aux.tp_net = skb_network_offset(skb); 1526 aux.tp_vlan_tci = skb->vlan_tci; 1527 1528 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); 1529 } 1530 1531 gap = check_packet_gap(skb); 1532 if (gap) 1533 put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap); 1534 1535 /* 1536 * Free or return the buffer as appropriate. Again this 1537 * hides all the races and re-entrancy issues from us. 1538 */ 1539 err = (flags&MSG_TRUNC) ? skb->len : copied; 1540 1541out_free: 1542 skb_free_datagram(sk, skb); 1543out: 1544 return err; 1545} 1546 1547static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, 1548 int *uaddr_len, int peer) 1549{ 1550 struct net_device *dev; 1551 struct sock *sk = sock->sk; 1552 1553 if (peer) 1554 return -EOPNOTSUPP; 1555 1556 uaddr->sa_family = AF_PACKET; 1557 dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex); 1558 if (dev) { 1559 strlcpy(uaddr->sa_data, dev->name, 15); 1560 dev_put(dev); 1561 } else 1562 memset(uaddr->sa_data, 0, 14); 1563 *uaddr_len = sizeof(*uaddr); 1564 1565 return 0; 1566} 1567 1568static int packet_getname(struct socket *sock, struct sockaddr *uaddr, 1569 int *uaddr_len, int peer) 1570{ 1571 struct net_device *dev; 1572 struct sock *sk = sock->sk; 1573 struct packet_sock *po = pkt_sk(sk); 1574 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; 1575 1576 if (peer) 1577 return -EOPNOTSUPP; 1578 1579 sll->sll_family = AF_PACKET; 1580 sll->sll_ifindex = po->ifindex; 1581 sll->sll_protocol = po->num; 1582 dev = dev_get_by_index(sock_net(sk), po->ifindex); 1583 if (dev) { 1584 sll->sll_hatype = dev->type; 1585 sll->sll_halen = dev->addr_len; 1586 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); 1587 dev_put(dev); 1588 } else { 1589 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ 1590 sll->sll_halen = 0; 1591 } 1592 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; 1593 1594 return 0; 1595} 1596 1597static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, 1598 int what) 1599{ 1600 switch (i->type) { 1601 case PACKET_MR_MULTICAST: 1602 if (what > 0) 1603 return dev_mc_add(dev, i->addr, i->alen, 0); 1604 else 1605 return dev_mc_delete(dev, i->addr, i->alen, 0); 1606 break; 1607 case PACKET_MR_PROMISC: 1608 return dev_set_promiscuity(dev, what); 1609 break; 1610 case PACKET_MR_ALLMULTI: 1611 return dev_set_allmulti(dev, what); 1612 break; 1613 case PACKET_MR_UNICAST: 1614 if (what > 0) 1615 return dev_unicast_add(dev, i->addr); 1616 else 1617 return dev_unicast_delete(dev, i->addr); 1618 break; 1619 default: 1620 break; 1621 } 1622 return 0; 1623} 1624 1625static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) 1626{ 1627 for ( ; i; i = i->next) { 1628 if (i->ifindex == dev->ifindex) 1629 packet_dev_mc(dev, i, what); 1630 } 1631} 1632 1633static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) 1634{ 1635 struct packet_sock *po = pkt_sk(sk); 1636 struct packet_mclist *ml, *i; 1637 struct net_device *dev; 1638 int err; 1639 1640 rtnl_lock(); 1641 1642 err = -ENODEV; 1643 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); 1644 if (!dev) 1645 goto done; 1646 1647 err = -EINVAL; 1648 if (mreq->mr_alen > dev->addr_len) 1649 goto done; 1650 1651 err = -ENOBUFS; 1652 i = kmalloc(sizeof(*i), GFP_KERNEL); 1653 if (i == NULL) 1654 goto done; 1655 1656 err = 0; 1657 for (ml = po->mclist; ml; ml = ml->next) { 1658 if (ml->ifindex == mreq->mr_ifindex && 1659 ml->type == mreq->mr_type && 1660 ml->alen == mreq->mr_alen && 1661 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { 1662 ml->count++; 1663 /* Free the new element ... */ 1664 kfree(i); 1665 goto done; 1666 } 1667 } 1668 1669 i->type = mreq->mr_type; 1670 i->ifindex = mreq->mr_ifindex; 1671 i->alen = mreq->mr_alen; 1672 memcpy(i->addr, mreq->mr_address, i->alen); 1673 i->count = 1; 1674 i->next = po->mclist; 1675 po->mclist = i; 1676 err = packet_dev_mc(dev, i, 1); 1677 if (err) { 1678 po->mclist = i->next; 1679 kfree(i); 1680 } 1681 1682done: 1683 rtnl_unlock(); 1684 return err; 1685} 1686 1687static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) 1688{ 1689 struct packet_mclist *ml, **mlp; 1690 1691 rtnl_lock(); 1692 1693 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) { 1694 if (ml->ifindex == mreq->mr_ifindex && 1695 ml->type == mreq->mr_type && 1696 ml->alen == mreq->mr_alen && 1697 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { 1698 if (--ml->count == 0) { 1699 struct net_device *dev; 1700 *mlp = ml->next; 1701 dev = dev_get_by_index(sock_net(sk), ml->ifindex); 1702 if (dev) { 1703 packet_dev_mc(dev, ml, -1); 1704 dev_put(dev); 1705 } 1706 kfree(ml); 1707 } 1708 rtnl_unlock(); 1709 return 0; 1710 } 1711 } 1712 rtnl_unlock(); 1713 return -EADDRNOTAVAIL; 1714} 1715 1716static void packet_flush_mclist(struct sock *sk) 1717{ 1718 struct packet_sock *po = pkt_sk(sk); 1719 struct packet_mclist *ml; 1720 1721 if (!po->mclist) 1722 return; 1723 1724 rtnl_lock(); 1725 while ((ml = po->mclist) != NULL) { 1726 struct net_device *dev; 1727 1728 po->mclist = ml->next; 1729 dev = dev_get_by_index(sock_net(sk), ml->ifindex); 1730 if (dev != NULL) { 1731 packet_dev_mc(dev, ml, -1); 1732 dev_put(dev); 1733 } 1734 kfree(ml); 1735 } 1736 rtnl_unlock(); 1737} 1738 1739static int 1740packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) 1741{ 1742 struct sock *sk = sock->sk; 1743 struct packet_sock *po = pkt_sk(sk); 1744 int ret; 1745 1746 if (level != SOL_PACKET) 1747 return -ENOPROTOOPT; 1748 1749 switch (optname) { 1750 case PACKET_ADD_MEMBERSHIP: 1751 case PACKET_DROP_MEMBERSHIP: 1752 { 1753 struct packet_mreq_max mreq; 1754 int len = optlen; 1755 memset(&mreq, 0, sizeof(mreq)); 1756 if (len < sizeof(struct packet_mreq)) 1757 return -EINVAL; 1758 if (len > sizeof(mreq)) 1759 len = sizeof(mreq); 1760 if (copy_from_user(&mreq, optval, len)) 1761 return -EFAULT; 1762 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) 1763 return -EINVAL; 1764 if (optname == PACKET_ADD_MEMBERSHIP) 1765 ret = packet_mc_add(sk, &mreq); 1766 else 1767 ret = packet_mc_drop(sk, &mreq); 1768 return ret; 1769 } 1770 1771#ifdef CONFIG_PACKET_MMAP 1772 case PACKET_RX_RING: 1773 case PACKET_TX_RING: 1774 { 1775 struct tpacket_req req; 1776 1777 if (optlen < sizeof(req)) 1778 return -EINVAL; 1779 if (copy_from_user(&req, optval, sizeof(req))) 1780 return -EFAULT; 1781 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); 1782 } 1783 case PACKET_COPY_THRESH: 1784 { 1785 int val; 1786 1787 if (optlen != sizeof(val)) 1788 return -EINVAL; 1789 if (copy_from_user(&val, optval, sizeof(val))) 1790 return -EFAULT; 1791 1792 pkt_sk(sk)->copy_thresh = val; 1793 return 0; 1794 } 1795 case PACKET_VERSION: 1796 { 1797 int val; 1798 1799 if (optlen != sizeof(val)) 1800 return -EINVAL; 1801 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 1802 return -EBUSY; 1803 if (copy_from_user(&val, optval, sizeof(val))) 1804 return -EFAULT; 1805 switch (val) { 1806 case TPACKET_V1: 1807 case TPACKET_V2: 1808 po->tp_version = val; 1809 return 0; 1810 default: 1811 return -EINVAL; 1812 } 1813 } 1814 case PACKET_RESERVE: 1815 { 1816 unsigned int val; 1817 1818 if (optlen != sizeof(val)) 1819 return -EINVAL; 1820 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 1821 return -EBUSY; 1822 if (copy_from_user(&val, optval, sizeof(val))) 1823 return -EFAULT; 1824 po->tp_reserve = val; 1825 return 0; 1826 } 1827 case PACKET_LOSS: 1828 { 1829 unsigned int val; 1830 1831 if (optlen != sizeof(val)) 1832 return -EINVAL; 1833 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) 1834 return -EBUSY; 1835 if (copy_from_user(&val, optval, sizeof(val))) 1836 return -EFAULT; 1837 po->tp_loss = !!val; 1838 return 0; 1839 } 1840#endif 1841 case PACKET_AUXDATA: 1842 { 1843 int val; 1844 1845 if (optlen < sizeof(val)) 1846 return -EINVAL; 1847 if (copy_from_user(&val, optval, sizeof(val))) 1848 return -EFAULT; 1849 1850 po->auxdata = !!val; 1851 return 0; 1852 } 1853 case PACKET_ORIGDEV: 1854 { 1855 int val; 1856 1857 if (optlen < sizeof(val)) 1858 return -EINVAL; 1859 if (copy_from_user(&val, optval, sizeof(val))) 1860 return -EFAULT; 1861 1862 po->origdev = !!val; 1863 return 0; 1864 } 1865 default: 1866 return -ENOPROTOOPT; 1867 } 1868} 1869 1870static int packet_getsockopt(struct socket *sock, int level, int optname, 1871 char __user *optval, int __user *optlen) 1872{ 1873 int len; 1874 int val; 1875 struct sock *sk = sock->sk; 1876 struct packet_sock *po = pkt_sk(sk); 1877 void *data; 1878 struct tpacket_stats st; 1879 1880 if (level != SOL_PACKET) 1881 return -ENOPROTOOPT; 1882 1883 if (get_user(len, optlen)) 1884 return -EFAULT; 1885 1886 if (len < 0) 1887 return -EINVAL; 1888 1889 switch (optname) { 1890 case PACKET_STATISTICS: 1891 if (len > sizeof(struct tpacket_stats)) 1892 len = sizeof(struct tpacket_stats); 1893 spin_lock_bh(&sk->sk_receive_queue.lock); 1894 st = po->stats; 1895 memset(&po->stats, 0, sizeof(st)); 1896 spin_unlock_bh(&sk->sk_receive_queue.lock); 1897 st.tp_packets += st.tp_drops; 1898 1899 data = &st; 1900 break; 1901 case PACKET_AUXDATA: 1902 if (len > sizeof(int)) 1903 len = sizeof(int); 1904 val = po->auxdata; 1905 1906 data = &val; 1907 break; 1908 case PACKET_ORIGDEV: 1909 if (len > sizeof(int)) 1910 len = sizeof(int); 1911 val = po->origdev; 1912 1913 data = &val; 1914 break; 1915#ifdef CONFIG_PACKET_MMAP 1916 case PACKET_VERSION: 1917 if (len > sizeof(int)) 1918 len = sizeof(int); 1919 val = po->tp_version; 1920 data = &val; 1921 break; 1922 case PACKET_HDRLEN: 1923 if (len > sizeof(int)) 1924 len = sizeof(int); 1925 if (copy_from_user(&val, optval, len)) 1926 return -EFAULT; 1927 switch (val) { 1928 case TPACKET_V1: 1929 val = sizeof(struct tpacket_hdr); 1930 break; 1931 case TPACKET_V2: 1932 val = sizeof(struct tpacket2_hdr); 1933 break; 1934 default: 1935 return -EINVAL; 1936 } 1937 data = &val; 1938 break; 1939 case PACKET_RESERVE: 1940 if (len > sizeof(unsigned int)) 1941 len = sizeof(unsigned int); 1942 val = po->tp_reserve; 1943 data = &val; 1944 break; 1945 case PACKET_LOSS: 1946 if (len > sizeof(unsigned int)) 1947 len = sizeof(unsigned int); 1948 val = po->tp_loss; 1949 data = &val; 1950 break; 1951#endif 1952 default: 1953 return -ENOPROTOOPT; 1954 } 1955 1956 if (put_user(len, optlen)) 1957 return -EFAULT; 1958 if (copy_to_user(optval, data, len)) 1959 return -EFAULT; 1960 return 0; 1961} 1962 1963 1964static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) 1965{ 1966 struct sock *sk; 1967 struct hlist_node *node; 1968 struct net_device *dev = data; 1969 struct net *net = dev_net(dev); 1970 1971 read_lock(&net->packet.sklist_lock); 1972 sk_for_each(sk, node, &net->packet.sklist) { 1973 struct packet_sock *po = pkt_sk(sk); 1974 1975 switch (msg) { 1976 case NETDEV_UNREGISTER: 1977 if (po->mclist) 1978 packet_dev_mclist(dev, po->mclist, -1); 1979 /* fallthrough */ 1980 1981 case NETDEV_DOWN: 1982 if (dev->ifindex == po->ifindex) { 1983 spin_lock(&po->bind_lock); 1984 if (po->running) { 1985 __dev_remove_pack(&po->prot_hook); 1986 __sock_put(sk); 1987 po->running = 0; 1988 sk->sk_err = ENETDOWN; 1989 if (!sock_flag(sk, SOCK_DEAD)) 1990 sk->sk_error_report(sk); 1991 } 1992 if (msg == NETDEV_UNREGISTER) { 1993 po->ifindex = -1; 1994 po->prot_hook.dev = NULL; 1995 } 1996 spin_unlock(&po->bind_lock); 1997 } 1998 break; 1999 case NETDEV_UP: 2000 spin_lock(&po->bind_lock); 2001 if (dev->ifindex == po->ifindex && po->num && 2002 !po->running) { 2003 dev_add_pack(&po->prot_hook); 2004 sock_hold(sk); 2005 po->running = 1; 2006 } 2007 spin_unlock(&po->bind_lock); 2008 break; 2009 } 2010 } 2011 read_unlock(&net->packet.sklist_lock); 2012 return NOTIFY_DONE; 2013} 2014 2015 2016static int packet_ioctl(struct socket *sock, unsigned int cmd, 2017 unsigned long arg) 2018{ 2019 struct sock *sk = sock->sk; 2020 2021 switch (cmd) { 2022 case SIOCOUTQ: 2023 { 2024 int amount = sk_wmem_alloc_get(sk); 2025 2026 return put_user(amount, (int __user *)arg); 2027 } 2028 case SIOCINQ: 2029 { 2030 struct sk_buff *skb; 2031 int amount = 0; 2032 2033 spin_lock_bh(&sk->sk_receive_queue.lock); 2034 skb = skb_peek(&sk->sk_receive_queue); 2035 if (skb) 2036 amount = skb->len; 2037 spin_unlock_bh(&sk->sk_receive_queue.lock); 2038 return put_user(amount, (int __user *)arg); 2039 } 2040 case SIOCGSTAMP: 2041 return sock_get_timestamp(sk, (struct timeval __user *)arg); 2042 case SIOCGSTAMPNS: 2043 return sock_get_timestampns(sk, (struct timespec __user *)arg); 2044 2045#ifdef CONFIG_INET 2046 case SIOCADDRT: 2047 case SIOCDELRT: 2048 case SIOCDARP: 2049 case SIOCGARP: 2050 case SIOCSARP: 2051 case SIOCGIFADDR: 2052 case SIOCSIFADDR: 2053 case SIOCGIFBRDADDR: 2054 case SIOCSIFBRDADDR: 2055 case SIOCGIFNETMASK: 2056 case SIOCSIFNETMASK: 2057 case SIOCGIFDSTADDR: 2058 case SIOCSIFDSTADDR: 2059 case SIOCSIFFLAGS: 2060 if (!net_eq(sock_net(sk), &init_net)) 2061 return -ENOIOCTLCMD; 2062 return inet_dgram_ops.ioctl(sock, cmd, arg); 2063#endif 2064 2065 default: 2066 return -ENOIOCTLCMD; 2067 } 2068 return 0; 2069} 2070 2071#ifndef CONFIG_PACKET_MMAP 2072#define packet_mmap sock_no_mmap 2073#define packet_poll datagram_poll 2074#else 2075 2076static unsigned int packet_poll(struct file *file, struct socket *sock, 2077 poll_table *wait) 2078{ 2079 struct sock *sk = sock->sk; 2080 struct packet_sock *po = pkt_sk(sk); 2081 unsigned int mask = datagram_poll(file, sock, wait); 2082 2083 spin_lock_bh(&sk->sk_receive_queue.lock); 2084 if (po->rx_ring.pg_vec) { 2085 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) 2086 mask |= POLLIN | POLLRDNORM; 2087 } 2088 spin_unlock_bh(&sk->sk_receive_queue.lock); 2089 spin_lock_bh(&sk->sk_write_queue.lock); 2090 if (po->tx_ring.pg_vec) { 2091 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) 2092 mask |= POLLOUT | POLLWRNORM; 2093 } 2094 spin_unlock_bh(&sk->sk_write_queue.lock); 2095 return mask; 2096} 2097 2098 2099/* Dirty? Well, I still did not learn better way to account 2100 * for user mmaps. 2101 */ 2102 2103static void packet_mm_open(struct vm_area_struct *vma) 2104{ 2105 struct file *file = vma->vm_file; 2106 struct socket *sock = file->private_data; 2107 struct sock *sk = sock->sk; 2108 2109 if (sk) 2110 atomic_inc(&pkt_sk(sk)->mapped); 2111} 2112 2113static void packet_mm_close(struct vm_area_struct *vma) 2114{ 2115 struct file *file = vma->vm_file; 2116 struct socket *sock = file->private_data; 2117 struct sock *sk = sock->sk; 2118 2119 if (sk) 2120 atomic_dec(&pkt_sk(sk)->mapped); 2121} 2122 2123static const struct vm_operations_struct packet_mmap_ops = { 2124 .open = packet_mm_open, 2125 .close = packet_mm_close, 2126}; 2127 2128static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) 2129{ 2130 int i; 2131 2132 for (i = 0; i < len; i++) { 2133 if (likely(pg_vec[i])) 2134 free_pages((unsigned long) pg_vec[i], order); 2135 } 2136 kfree(pg_vec); 2137} 2138 2139static inline char *alloc_one_pg_vec_page(unsigned long order) 2140{ 2141 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; 2142 2143 return (char *) __get_free_pages(gfp_flags, order); 2144} 2145 2146static char **alloc_pg_vec(struct tpacket_req *req, int order) 2147{ 2148 unsigned int block_nr = req->tp_block_nr; 2149 char **pg_vec; 2150 int i; 2151 2152 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL); 2153 if (unlikely(!pg_vec)) 2154 goto out; 2155 2156 for (i = 0; i < block_nr; i++) { 2157 pg_vec[i] = alloc_one_pg_vec_page(order); 2158 if (unlikely(!pg_vec[i])) 2159 goto out_free_pgvec; 2160 } 2161 2162out: 2163 return pg_vec; 2164 2165out_free_pgvec: 2166 free_pg_vec(pg_vec, order, block_nr); 2167 pg_vec = NULL; 2168 goto out; 2169} 2170 2171static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 2172 int closing, int tx_ring) 2173{ 2174 char **pg_vec = NULL; 2175 struct packet_sock *po = pkt_sk(sk); 2176 int was_running, order = 0; 2177 struct packet_ring_buffer *rb; 2178 struct sk_buff_head *rb_queue; 2179 __be16 num; 2180 int err; 2181 2182 rb = tx_ring ? &po->tx_ring : &po->rx_ring; 2183 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; 2184 2185 err = -EBUSY; 2186 if (!closing) { 2187 if (atomic_read(&po->mapped)) 2188 goto out; 2189 if (atomic_read(&rb->pending)) 2190 goto out; 2191 } 2192 2193 if (req->tp_block_nr) { 2194 /* Sanity tests and some calculations */ 2195 err = -EBUSY; 2196 if (unlikely(rb->pg_vec)) 2197 goto out; 2198 2199 switch (po->tp_version) { 2200 case TPACKET_V1: 2201 po->tp_hdrlen = TPACKET_HDRLEN; 2202 break; 2203 case TPACKET_V2: 2204 po->tp_hdrlen = TPACKET2_HDRLEN; 2205 break; 2206 } 2207 2208 err = -EINVAL; 2209 if (unlikely((int)req->tp_block_size <= 0)) 2210 goto out; 2211 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 2212 goto out; 2213 if (unlikely(req->tp_frame_size < po->tp_hdrlen + 2214 po->tp_reserve)) 2215 goto out; 2216 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 2217 goto out; 2218 2219 rb->frames_per_block = req->tp_block_size/req->tp_frame_size; 2220 if (unlikely(rb->frames_per_block <= 0)) 2221 goto out; 2222 if (unlikely((rb->frames_per_block * req->tp_block_nr) != 2223 req->tp_frame_nr)) 2224 goto out; 2225 2226 err = -ENOMEM; 2227 order = get_order(req->tp_block_size); 2228 pg_vec = alloc_pg_vec(req, order); 2229 if (unlikely(!pg_vec)) 2230 goto out; 2231 } 2232 /* Done */ 2233 else { 2234 err = -EINVAL; 2235 if (unlikely(req->tp_frame_nr)) 2236 goto out; 2237 } 2238 2239 lock_sock(sk); 2240 2241 /* Detach socket from network */ 2242 spin_lock(&po->bind_lock); 2243 was_running = po->running; 2244 num = po->num; 2245 if (was_running) { 2246 __dev_remove_pack(&po->prot_hook); 2247 po->num = 0; 2248 po->running = 0; 2249 __sock_put(sk); 2250 } 2251 spin_unlock(&po->bind_lock); 2252 2253 synchronize_net(); 2254 2255 err = -EBUSY; 2256 mutex_lock(&po->pg_vec_lock); 2257 if (closing || atomic_read(&po->mapped) == 0) { 2258 err = 0; 2259#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) 2260 spin_lock_bh(&rb_queue->lock); 2261 pg_vec = XC(rb->pg_vec, pg_vec); 2262 rb->frame_max = (req->tp_frame_nr - 1); 2263 rb->head = 0; 2264 rb->frame_size = req->tp_frame_size; 2265 spin_unlock_bh(&rb_queue->lock); 2266 2267 order = XC(rb->pg_vec_order, order); 2268 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr); 2269 2270 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; 2271 po->prot_hook.func = (po->rx_ring.pg_vec) ? 2272 tpacket_rcv : packet_rcv; 2273 skb_queue_purge(rb_queue); 2274#undef XC 2275 if (atomic_read(&po->mapped)) 2276 pr_err("packet_mmap: vma is busy: %d\n", 2277 atomic_read(&po->mapped)); 2278 } 2279 mutex_unlock(&po->pg_vec_lock); 2280 2281 spin_lock(&po->bind_lock); 2282 if (was_running && !po->running) { 2283 sock_hold(sk); 2284 po->running = 1; 2285 po->num = num; 2286 dev_add_pack(&po->prot_hook); 2287 } 2288 spin_unlock(&po->bind_lock); 2289 2290 release_sock(sk); 2291 2292 if (pg_vec) 2293 free_pg_vec(pg_vec, order, req->tp_block_nr); 2294out: 2295 return err; 2296} 2297 2298static int packet_mmap(struct file *file, struct socket *sock, 2299 struct vm_area_struct *vma) 2300{ 2301 struct sock *sk = sock->sk; 2302 struct packet_sock *po = pkt_sk(sk); 2303 unsigned long size, expected_size; 2304 struct packet_ring_buffer *rb; 2305 unsigned long start; 2306 int err = -EINVAL; 2307 int i; 2308 2309 if (vma->vm_pgoff) 2310 return -EINVAL; 2311 2312 mutex_lock(&po->pg_vec_lock); 2313 2314 expected_size = 0; 2315 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { 2316 if (rb->pg_vec) { 2317 expected_size += rb->pg_vec_len 2318 * rb->pg_vec_pages 2319 * PAGE_SIZE; 2320 } 2321 } 2322 2323 if (expected_size == 0) 2324 goto out; 2325 2326 size = vma->vm_end - vma->vm_start; 2327 if (size != expected_size) 2328 goto out; 2329 2330 start = vma->vm_start; 2331 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { 2332 if (rb->pg_vec == NULL) 2333 continue; 2334 2335 for (i = 0; i < rb->pg_vec_len; i++) { 2336 struct page *page = virt_to_page(rb->pg_vec[i]); 2337 int pg_num; 2338 2339 for (pg_num = 0; pg_num < rb->pg_vec_pages; 2340 pg_num++, page++) { 2341 err = vm_insert_page(vma, start, page); 2342 if (unlikely(err)) 2343 goto out; 2344 start += PAGE_SIZE; 2345 } 2346 } 2347 } 2348 2349 atomic_inc(&po->mapped); 2350 vma->vm_ops = &packet_mmap_ops; 2351 err = 0; 2352 2353out: 2354 mutex_unlock(&po->pg_vec_lock); 2355 return err; 2356} 2357#endif 2358 2359 2360static const struct proto_ops packet_ops_spkt = { 2361 .family = PF_PACKET, 2362 .owner = THIS_MODULE, 2363 .release = packet_release, 2364 .bind = packet_bind_spkt, 2365 .connect = sock_no_connect, 2366 .socketpair = sock_no_socketpair, 2367 .accept = sock_no_accept, 2368 .getname = packet_getname_spkt, 2369 .poll = datagram_poll, 2370 .ioctl = packet_ioctl, 2371 .listen = sock_no_listen, 2372 .shutdown = sock_no_shutdown, 2373 .setsockopt = sock_no_setsockopt, 2374 .getsockopt = sock_no_getsockopt, 2375 .sendmsg = packet_sendmsg_spkt, 2376 .recvmsg = packet_recvmsg, 2377 .mmap = sock_no_mmap, 2378 .sendpage = sock_no_sendpage, 2379}; 2380 2381static const struct proto_ops packet_ops = { 2382 .family = PF_PACKET, 2383 .owner = THIS_MODULE, 2384 .release = packet_release, 2385 .bind = packet_bind, 2386 .connect = sock_no_connect, 2387 .socketpair = sock_no_socketpair, 2388 .accept = sock_no_accept, 2389 .getname = packet_getname, 2390 .poll = packet_poll, 2391 .ioctl = packet_ioctl, 2392 .listen = sock_no_listen, 2393 .shutdown = sock_no_shutdown, 2394 .setsockopt = packet_setsockopt, 2395 .getsockopt = packet_getsockopt, 2396 .sendmsg = packet_sendmsg, 2397 .recvmsg = packet_recvmsg, 2398 .mmap = packet_mmap, 2399 .sendpage = sock_no_sendpage, 2400}; 2401 2402static struct net_proto_family packet_family_ops = { 2403 .family = PF_PACKET, 2404 .create = packet_create, 2405 .owner = THIS_MODULE, 2406}; 2407 2408static struct notifier_block packet_netdev_notifier = { 2409 .notifier_call = packet_notifier, 2410}; 2411 2412#ifdef CONFIG_PROC_FS 2413static inline struct sock *packet_seq_idx(struct net *net, loff_t off) 2414{ 2415 struct sock *s; 2416 struct hlist_node *node; 2417 2418 sk_for_each(s, node, &net->packet.sklist) { 2419 if (!off--) 2420 return s; 2421 } 2422 return NULL; 2423} 2424 2425static void *packet_seq_start(struct seq_file *seq, loff_t *pos) 2426 __acquires(seq_file_net(seq)->packet.sklist_lock) 2427{ 2428 struct net *net = seq_file_net(seq); 2429 read_lock(&net->packet.sklist_lock); 2430 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; 2431} 2432 2433static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2434{ 2435 struct net *net = seq_file_net(seq); 2436 ++*pos; 2437 return (v == SEQ_START_TOKEN) 2438 ? sk_head(&net->packet.sklist) 2439 : sk_next((struct sock *)v) ; 2440} 2441 2442static void packet_seq_stop(struct seq_file *seq, void *v) 2443 __releases(seq_file_net(seq)->packet.sklist_lock) 2444{ 2445 struct net *net = seq_file_net(seq); 2446 read_unlock(&net->packet.sklist_lock); 2447} 2448 2449static int packet_seq_show(struct seq_file *seq, void *v) 2450{ 2451 if (v == SEQ_START_TOKEN) 2452 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); 2453 else { 2454 struct sock *s = v; 2455 const struct packet_sock *po = pkt_sk(s); 2456 2457 seq_printf(seq, 2458 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", 2459 s, 2460 atomic_read(&s->sk_refcnt), 2461 s->sk_type, 2462 ntohs(po->num), 2463 po->ifindex, 2464 po->running, 2465 atomic_read(&s->sk_rmem_alloc), 2466 sock_i_uid(s), 2467 sock_i_ino(s)); 2468 } 2469 2470 return 0; 2471} 2472 2473static const struct seq_operations packet_seq_ops = { 2474 .start = packet_seq_start, 2475 .next = packet_seq_next, 2476 .stop = packet_seq_stop, 2477 .show = packet_seq_show, 2478}; 2479 2480static int packet_seq_open(struct inode *inode, struct file *file) 2481{ 2482 return seq_open_net(inode, file, &packet_seq_ops, 2483 sizeof(struct seq_net_private)); 2484} 2485 2486static const struct file_operations packet_seq_fops = { 2487 .owner = THIS_MODULE, 2488 .open = packet_seq_open, 2489 .read = seq_read, 2490 .llseek = seq_lseek, 2491 .release = seq_release_net, 2492}; 2493 2494#endif 2495 2496static int packet_net_init(struct net *net) 2497{ 2498 rwlock_init(&net->packet.sklist_lock); 2499 INIT_HLIST_HEAD(&net->packet.sklist); 2500 2501 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) 2502 return -ENOMEM; 2503 2504 return 0; 2505} 2506 2507static void packet_net_exit(struct net *net) 2508{ 2509 proc_net_remove(net, "packet"); 2510} 2511 2512static struct pernet_operations packet_net_ops = { 2513 .init = packet_net_init, 2514 .exit = packet_net_exit, 2515}; 2516 2517 2518static void __exit packet_exit(void) 2519{ 2520 unregister_netdevice_notifier(&packet_netdev_notifier); 2521 unregister_pernet_subsys(&packet_net_ops); 2522 sock_unregister(PF_PACKET); 2523 proto_unregister(&packet_proto); 2524} 2525 2526static int __init packet_init(void) 2527{ 2528 int rc = proto_register(&packet_proto, 0); 2529 2530 if (rc != 0) 2531 goto out; 2532 2533 sock_register(&packet_family_ops); 2534 register_pernet_subsys(&packet_net_ops); 2535 register_netdevice_notifier(&packet_netdev_notifier); 2536out: 2537 return rc; 2538} 2539 2540module_init(packet_init); 2541module_exit(packet_exit); 2542MODULE_LICENSE("GPL"); 2543MODULE_ALIAS_NETPROTO(PF_PACKET); 2544