sock.c revision 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Generic socket support routines. Memory allocators, socket lock/release 7 * handler for protocols to use and generic option handler. 8 * 9 * 10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ 11 * 12 * Authors: Ross Biro 13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 14 * Florian La Roche, <flla@stud.uni-sb.de> 15 * Alan Cox, <A.Cox@swansea.ac.uk> 16 * 17 * Fixes: 18 * Alan Cox : Numerous verify_area() problems 19 * Alan Cox : Connecting on a connecting socket 20 * now returns an error for tcp. 21 * Alan Cox : sock->protocol is set correctly. 22 * and is not sometimes left as 0. 23 * Alan Cox : connect handles icmp errors on a 24 * connect properly. Unfortunately there 25 * is a restart syscall nasty there. I 26 * can't match BSD without hacking the C 27 * library. Ideas urgently sought! 28 * Alan Cox : Disallow bind() to addresses that are 29 * not ours - especially broadcast ones!! 30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 32 * instead they leave that for the DESTROY timer. 33 * Alan Cox : Clean up error flag in accept 34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 35 * was buggy. Put a remove_sock() in the handler 36 * for memory when we hit 0. Also altered the timer 37 * code. The ACK stuff can wait and needs major 38 * TCP layer surgery. 39 * Alan Cox : Fixed TCP ack bug, removed remove sock 40 * and fixed timer/inet_bh race. 41 * Alan Cox : Added zapped flag for TCP 42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 47 * Rick Sladkey : Relaxed UDP rules for matching packets. 48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 49 * Pauline Middelink : identd support 50 * Alan Cox : Fixed connect() taking signals I think. 51 * Alan Cox : SO_LINGER supported 52 * Alan Cox : Error reporting fixes 53 * Anonymous : inet_create tidied up (sk->reuse setting) 54 * Alan Cox : inet sockets don't set sk->type! 55 * Alan Cox : Split socket option code 56 * Alan Cox : Callbacks 57 * Alan Cox : Nagle flag for Charles & Johannes stuff 58 * Alex : Removed restriction on inet fioctl 59 * Alan Cox : Splitting INET from NET core 60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 62 * Alan Cox : Split IP from generic code 63 * Alan Cox : New kfree_skbmem() 64 * Alan Cox : Make SO_DEBUG superuser only. 65 * Alan Cox : Allow anyone to clear SO_DEBUG 66 * (compatibility fix) 67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 68 * Alan Cox : Allocator for a socket is settable. 69 * Alan Cox : SO_ERROR includes soft errors. 70 * Alan Cox : Allow NULL arguments on some SO_ opts 71 * Alan Cox : Generic socket allocation to make hooks 72 * easier (suggested by Craig Metz). 73 * Michael Pall : SO_ERROR returns positive errno again 74 * Steve Whitehouse: Added default destructor to free 75 * protocol private data. 76 * Steve Whitehouse: Added various other default routines 77 * common to several socket families. 78 * Chris Evans : Call suser() check last on F_SETOWN 79 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 80 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 81 * Andi Kleen : Fix write_space callback 82 * Chris Evans : Security fixes - signedness again 83 * Arnaldo C. Melo : cleanups, use skb_queue_purge 84 * 85 * To Fix: 86 * 87 * 88 * This program is free software; you can redistribute it and/or 89 * modify it under the terms of the GNU General Public License 90 * as published by the Free Software Foundation; either version 91 * 2 of the License, or (at your option) any later version. 92 */ 93 94#include <linux/config.h> 95#include <linux/errno.h> 96#include <linux/types.h> 97#include <linux/socket.h> 98#include <linux/in.h> 99#include <linux/kernel.h> 100#include <linux/module.h> 101#include <linux/proc_fs.h> 102#include <linux/seq_file.h> 103#include <linux/sched.h> 104#include <linux/timer.h> 105#include <linux/string.h> 106#include <linux/sockios.h> 107#include <linux/net.h> 108#include <linux/mm.h> 109#include <linux/slab.h> 110#include <linux/interrupt.h> 111#include <linux/poll.h> 112#include <linux/tcp.h> 113#include <linux/init.h> 114 115#include <asm/uaccess.h> 116#include <asm/system.h> 117 118#include <linux/netdevice.h> 119#include <net/protocol.h> 120#include <linux/skbuff.h> 121#include <net/request_sock.h> 122#include <net/sock.h> 123#include <net/xfrm.h> 124#include <linux/ipsec.h> 125 126#include <linux/filter.h> 127 128#ifdef CONFIG_INET 129#include <net/tcp.h> 130#endif 131 132/* Take into consideration the size of the struct sk_buff overhead in the 133 * determination of these values, since that is non-constant across 134 * platforms. This makes socket queueing behavior and performance 135 * not depend upon such differences. 136 */ 137#define _SK_MEM_PACKETS 256 138#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) 139#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 140#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 141 142/* Run time adjustable parameters. */ 143__u32 sysctl_wmem_max = SK_WMEM_MAX; 144__u32 sysctl_rmem_max = SK_RMEM_MAX; 145__u32 sysctl_wmem_default = SK_WMEM_MAX; 146__u32 sysctl_rmem_default = SK_RMEM_MAX; 147 148/* Maximal space eaten by iovec or ancilliary data plus some space */ 149int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); 150 151static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 152{ 153 struct timeval tv; 154 155 if (optlen < sizeof(tv)) 156 return -EINVAL; 157 if (copy_from_user(&tv, optval, sizeof(tv))) 158 return -EFAULT; 159 160 *timeo_p = MAX_SCHEDULE_TIMEOUT; 161 if (tv.tv_sec == 0 && tv.tv_usec == 0) 162 return 0; 163 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) 164 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); 165 return 0; 166} 167 168static void sock_warn_obsolete_bsdism(const char *name) 169{ 170 static int warned; 171 static char warncomm[TASK_COMM_LEN]; 172 if (strcmp(warncomm, current->comm) && warned < 5) { 173 strcpy(warncomm, current->comm); 174 printk(KERN_WARNING "process `%s' is using obsolete " 175 "%s SO_BSDCOMPAT\n", warncomm, name); 176 warned++; 177 } 178} 179 180static void sock_disable_timestamp(struct sock *sk) 181{ 182 if (sock_flag(sk, SOCK_TIMESTAMP)) { 183 sock_reset_flag(sk, SOCK_TIMESTAMP); 184 net_disable_timestamp(); 185 } 186} 187 188 189/* 190 * This is meant for all protocols to use and covers goings on 191 * at the socket level. Everything here is generic. 192 */ 193 194int sock_setsockopt(struct socket *sock, int level, int optname, 195 char __user *optval, int optlen) 196{ 197 struct sock *sk=sock->sk; 198 struct sk_filter *filter; 199 int val; 200 int valbool; 201 struct linger ling; 202 int ret = 0; 203 204 /* 205 * Options without arguments 206 */ 207 208#ifdef SO_DONTLINGER /* Compatibility item... */ 209 if (optname == SO_DONTLINGER) { 210 lock_sock(sk); 211 sock_reset_flag(sk, SOCK_LINGER); 212 release_sock(sk); 213 return 0; 214 } 215#endif 216 217 if(optlen<sizeof(int)) 218 return(-EINVAL); 219 220 if (get_user(val, (int __user *)optval)) 221 return -EFAULT; 222 223 valbool = val?1:0; 224 225 lock_sock(sk); 226 227 switch(optname) 228 { 229 case SO_DEBUG: 230 if(val && !capable(CAP_NET_ADMIN)) 231 { 232 ret = -EACCES; 233 } 234 else if (valbool) 235 sock_set_flag(sk, SOCK_DBG); 236 else 237 sock_reset_flag(sk, SOCK_DBG); 238 break; 239 case SO_REUSEADDR: 240 sk->sk_reuse = valbool; 241 break; 242 case SO_TYPE: 243 case SO_ERROR: 244 ret = -ENOPROTOOPT; 245 break; 246 case SO_DONTROUTE: 247 if (valbool) 248 sock_set_flag(sk, SOCK_LOCALROUTE); 249 else 250 sock_reset_flag(sk, SOCK_LOCALROUTE); 251 break; 252 case SO_BROADCAST: 253 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 254 break; 255 case SO_SNDBUF: 256 /* Don't error on this BSD doesn't and if you think 257 about it this is right. Otherwise apps have to 258 play 'guess the biggest size' games. RCVBUF/SNDBUF 259 are treated in BSD as hints */ 260 261 if (val > sysctl_wmem_max) 262 val = sysctl_wmem_max; 263set_sndbuf: 264 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 265 if ((val * 2) < SOCK_MIN_SNDBUF) 266 sk->sk_sndbuf = SOCK_MIN_SNDBUF; 267 else 268 sk->sk_sndbuf = val * 2; 269 270 /* 271 * Wake up sending tasks if we 272 * upped the value. 273 */ 274 sk->sk_write_space(sk); 275 break; 276 277 case SO_SNDBUFFORCE: 278 if (!capable(CAP_NET_ADMIN)) { 279 ret = -EPERM; 280 break; 281 } 282 goto set_sndbuf; 283 284 case SO_RCVBUF: 285 /* Don't error on this BSD doesn't and if you think 286 about it this is right. Otherwise apps have to 287 play 'guess the biggest size' games. RCVBUF/SNDBUF 288 are treated in BSD as hints */ 289 290 if (val > sysctl_rmem_max) 291 val = sysctl_rmem_max; 292set_rcvbuf: 293 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 294 /* FIXME: is this lower bound the right one? */ 295 if ((val * 2) < SOCK_MIN_RCVBUF) 296 sk->sk_rcvbuf = SOCK_MIN_RCVBUF; 297 else 298 sk->sk_rcvbuf = val * 2; 299 break; 300 301 case SO_RCVBUFFORCE: 302 if (!capable(CAP_NET_ADMIN)) { 303 ret = -EPERM; 304 break; 305 } 306 goto set_rcvbuf; 307 308 case SO_KEEPALIVE: 309#ifdef CONFIG_INET 310 if (sk->sk_protocol == IPPROTO_TCP) 311 tcp_set_keepalive(sk, valbool); 312#endif 313 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 314 break; 315 316 case SO_OOBINLINE: 317 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 318 break; 319 320 case SO_NO_CHECK: 321 sk->sk_no_check = valbool; 322 break; 323 324 case SO_PRIORITY: 325 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 326 sk->sk_priority = val; 327 else 328 ret = -EPERM; 329 break; 330 331 case SO_LINGER: 332 if(optlen<sizeof(ling)) { 333 ret = -EINVAL; /* 1003.1g */ 334 break; 335 } 336 if (copy_from_user(&ling,optval,sizeof(ling))) { 337 ret = -EFAULT; 338 break; 339 } 340 if (!ling.l_onoff) 341 sock_reset_flag(sk, SOCK_LINGER); 342 else { 343#if (BITS_PER_LONG == 32) 344 if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 345 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 346 else 347#endif 348 sk->sk_lingertime = ling.l_linger * HZ; 349 sock_set_flag(sk, SOCK_LINGER); 350 } 351 break; 352 353 case SO_BSDCOMPAT: 354 sock_warn_obsolete_bsdism("setsockopt"); 355 break; 356 357 case SO_PASSCRED: 358 if (valbool) 359 set_bit(SOCK_PASSCRED, &sock->flags); 360 else 361 clear_bit(SOCK_PASSCRED, &sock->flags); 362 break; 363 364 case SO_TIMESTAMP: 365 if (valbool) { 366 sock_set_flag(sk, SOCK_RCVTSTAMP); 367 sock_enable_timestamp(sk); 368 } else 369 sock_reset_flag(sk, SOCK_RCVTSTAMP); 370 break; 371 372 case SO_RCVLOWAT: 373 if (val < 0) 374 val = INT_MAX; 375 sk->sk_rcvlowat = val ? : 1; 376 break; 377 378 case SO_RCVTIMEO: 379 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 380 break; 381 382 case SO_SNDTIMEO: 383 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 384 break; 385 386#ifdef CONFIG_NETDEVICES 387 case SO_BINDTODEVICE: 388 { 389 char devname[IFNAMSIZ]; 390 391 /* Sorry... */ 392 if (!capable(CAP_NET_RAW)) { 393 ret = -EPERM; 394 break; 395 } 396 397 /* Bind this socket to a particular device like "eth0", 398 * as specified in the passed interface name. If the 399 * name is "" or the option length is zero the socket 400 * is not bound. 401 */ 402 403 if (!valbool) { 404 sk->sk_bound_dev_if = 0; 405 } else { 406 if (optlen > IFNAMSIZ) 407 optlen = IFNAMSIZ; 408 if (copy_from_user(devname, optval, optlen)) { 409 ret = -EFAULT; 410 break; 411 } 412 413 /* Remove any cached route for this socket. */ 414 sk_dst_reset(sk); 415 416 if (devname[0] == '\0') { 417 sk->sk_bound_dev_if = 0; 418 } else { 419 struct net_device *dev = dev_get_by_name(devname); 420 if (!dev) { 421 ret = -ENODEV; 422 break; 423 } 424 sk->sk_bound_dev_if = dev->ifindex; 425 dev_put(dev); 426 } 427 } 428 break; 429 } 430#endif 431 432 433 case SO_ATTACH_FILTER: 434 ret = -EINVAL; 435 if (optlen == sizeof(struct sock_fprog)) { 436 struct sock_fprog fprog; 437 438 ret = -EFAULT; 439 if (copy_from_user(&fprog, optval, sizeof(fprog))) 440 break; 441 442 ret = sk_attach_filter(&fprog, sk); 443 } 444 break; 445 446 case SO_DETACH_FILTER: 447 spin_lock_bh(&sk->sk_lock.slock); 448 filter = sk->sk_filter; 449 if (filter) { 450 sk->sk_filter = NULL; 451 spin_unlock_bh(&sk->sk_lock.slock); 452 sk_filter_release(sk, filter); 453 break; 454 } 455 spin_unlock_bh(&sk->sk_lock.slock); 456 ret = -ENONET; 457 break; 458 459 /* We implement the SO_SNDLOWAT etc to 460 not be settable (1003.1g 5.3) */ 461 default: 462 ret = -ENOPROTOOPT; 463 break; 464 } 465 release_sock(sk); 466 return ret; 467} 468 469 470int sock_getsockopt(struct socket *sock, int level, int optname, 471 char __user *optval, int __user *optlen) 472{ 473 struct sock *sk = sock->sk; 474 475 union 476 { 477 int val; 478 struct linger ling; 479 struct timeval tm; 480 } v; 481 482 unsigned int lv = sizeof(int); 483 int len; 484 485 if(get_user(len,optlen)) 486 return -EFAULT; 487 if(len < 0) 488 return -EINVAL; 489 490 switch(optname) 491 { 492 case SO_DEBUG: 493 v.val = sock_flag(sk, SOCK_DBG); 494 break; 495 496 case SO_DONTROUTE: 497 v.val = sock_flag(sk, SOCK_LOCALROUTE); 498 break; 499 500 case SO_BROADCAST: 501 v.val = !!sock_flag(sk, SOCK_BROADCAST); 502 break; 503 504 case SO_SNDBUF: 505 v.val = sk->sk_sndbuf; 506 break; 507 508 case SO_RCVBUF: 509 v.val = sk->sk_rcvbuf; 510 break; 511 512 case SO_REUSEADDR: 513 v.val = sk->sk_reuse; 514 break; 515 516 case SO_KEEPALIVE: 517 v.val = !!sock_flag(sk, SOCK_KEEPOPEN); 518 break; 519 520 case SO_TYPE: 521 v.val = sk->sk_type; 522 break; 523 524 case SO_ERROR: 525 v.val = -sock_error(sk); 526 if(v.val==0) 527 v.val = xchg(&sk->sk_err_soft, 0); 528 break; 529 530 case SO_OOBINLINE: 531 v.val = !!sock_flag(sk, SOCK_URGINLINE); 532 break; 533 534 case SO_NO_CHECK: 535 v.val = sk->sk_no_check; 536 break; 537 538 case SO_PRIORITY: 539 v.val = sk->sk_priority; 540 break; 541 542 case SO_LINGER: 543 lv = sizeof(v.ling); 544 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); 545 v.ling.l_linger = sk->sk_lingertime / HZ; 546 break; 547 548 case SO_BSDCOMPAT: 549 sock_warn_obsolete_bsdism("getsockopt"); 550 break; 551 552 case SO_TIMESTAMP: 553 v.val = sock_flag(sk, SOCK_RCVTSTAMP); 554 break; 555 556 case SO_RCVTIMEO: 557 lv=sizeof(struct timeval); 558 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 559 v.tm.tv_sec = 0; 560 v.tm.tv_usec = 0; 561 } else { 562 v.tm.tv_sec = sk->sk_rcvtimeo / HZ; 563 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; 564 } 565 break; 566 567 case SO_SNDTIMEO: 568 lv=sizeof(struct timeval); 569 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 570 v.tm.tv_sec = 0; 571 v.tm.tv_usec = 0; 572 } else { 573 v.tm.tv_sec = sk->sk_sndtimeo / HZ; 574 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; 575 } 576 break; 577 578 case SO_RCVLOWAT: 579 v.val = sk->sk_rcvlowat; 580 break; 581 582 case SO_SNDLOWAT: 583 v.val=1; 584 break; 585 586 case SO_PASSCRED: 587 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; 588 break; 589 590 case SO_PEERCRED: 591 if (len > sizeof(sk->sk_peercred)) 592 len = sizeof(sk->sk_peercred); 593 if (copy_to_user(optval, &sk->sk_peercred, len)) 594 return -EFAULT; 595 goto lenout; 596 597 case SO_PEERNAME: 598 { 599 char address[128]; 600 601 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 602 return -ENOTCONN; 603 if (lv < len) 604 return -EINVAL; 605 if (copy_to_user(optval, address, len)) 606 return -EFAULT; 607 goto lenout; 608 } 609 610 /* Dubious BSD thing... Probably nobody even uses it, but 611 * the UNIX standard wants it for whatever reason... -DaveM 612 */ 613 case SO_ACCEPTCONN: 614 v.val = sk->sk_state == TCP_LISTEN; 615 break; 616 617 case SO_PEERSEC: 618 return security_socket_getpeersec(sock, optval, optlen, len); 619 620 default: 621 return(-ENOPROTOOPT); 622 } 623 if (len > lv) 624 len = lv; 625 if (copy_to_user(optval, &v, len)) 626 return -EFAULT; 627lenout: 628 if (put_user(len, optlen)) 629 return -EFAULT; 630 return 0; 631} 632 633/** 634 * sk_alloc - All socket objects are allocated here 635 * @family: protocol family 636 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 637 * @prot: struct proto associated with this new sock instance 638 * @zero_it: if we should zero the newly allocated sock 639 */ 640struct sock *sk_alloc(int family, unsigned int __nocast priority, 641 struct proto *prot, int zero_it) 642{ 643 struct sock *sk = NULL; 644 kmem_cache_t *slab = prot->slab; 645 646 if (slab != NULL) 647 sk = kmem_cache_alloc(slab, priority); 648 else 649 sk = kmalloc(prot->obj_size, priority); 650 651 if (sk) { 652 if (zero_it) { 653 memset(sk, 0, prot->obj_size); 654 sk->sk_family = family; 655 /* 656 * See comment in struct sock definition to understand 657 * why we need sk_prot_creator -acme 658 */ 659 sk->sk_prot = sk->sk_prot_creator = prot; 660 sock_lock_init(sk); 661 } 662 663 if (security_sk_alloc(sk, family, priority)) { 664 if (slab != NULL) 665 kmem_cache_free(slab, sk); 666 else 667 kfree(sk); 668 sk = NULL; 669 } else 670 __module_get(prot->owner); 671 } 672 return sk; 673} 674 675void sk_free(struct sock *sk) 676{ 677 struct sk_filter *filter; 678 struct module *owner = sk->sk_prot_creator->owner; 679 680 if (sk->sk_destruct) 681 sk->sk_destruct(sk); 682 683 filter = sk->sk_filter; 684 if (filter) { 685 sk_filter_release(sk, filter); 686 sk->sk_filter = NULL; 687 } 688 689 sock_disable_timestamp(sk); 690 691 if (atomic_read(&sk->sk_omem_alloc)) 692 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 693 __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); 694 695 security_sk_free(sk); 696 if (sk->sk_prot_creator->slab != NULL) 697 kmem_cache_free(sk->sk_prot_creator->slab, sk); 698 else 699 kfree(sk); 700 module_put(owner); 701} 702 703void __init sk_init(void) 704{ 705 if (num_physpages <= 4096) { 706 sysctl_wmem_max = 32767; 707 sysctl_rmem_max = 32767; 708 sysctl_wmem_default = 32767; 709 sysctl_rmem_default = 32767; 710 } else if (num_physpages >= 131072) { 711 sysctl_wmem_max = 131071; 712 sysctl_rmem_max = 131071; 713 } 714} 715 716/* 717 * Simple resource managers for sockets. 718 */ 719 720 721/* 722 * Write buffer destructor automatically called from kfree_skb. 723 */ 724void sock_wfree(struct sk_buff *skb) 725{ 726 struct sock *sk = skb->sk; 727 728 /* In case it might be waiting for more memory. */ 729 atomic_sub(skb->truesize, &sk->sk_wmem_alloc); 730 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) 731 sk->sk_write_space(sk); 732 sock_put(sk); 733} 734 735/* 736 * Read buffer destructor automatically called from kfree_skb. 737 */ 738void sock_rfree(struct sk_buff *skb) 739{ 740 struct sock *sk = skb->sk; 741 742 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 743} 744 745 746int sock_i_uid(struct sock *sk) 747{ 748 int uid; 749 750 read_lock(&sk->sk_callback_lock); 751 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 752 read_unlock(&sk->sk_callback_lock); 753 return uid; 754} 755 756unsigned long sock_i_ino(struct sock *sk) 757{ 758 unsigned long ino; 759 760 read_lock(&sk->sk_callback_lock); 761 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 762 read_unlock(&sk->sk_callback_lock); 763 return ino; 764} 765 766/* 767 * Allocate a skb from the socket's send buffer. 768 */ 769struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 770 unsigned int __nocast priority) 771{ 772 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 773 struct sk_buff * skb = alloc_skb(size, priority); 774 if (skb) { 775 skb_set_owner_w(skb, sk); 776 return skb; 777 } 778 } 779 return NULL; 780} 781 782/* 783 * Allocate a skb from the socket's receive buffer. 784 */ 785struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, 786 unsigned int __nocast priority) 787{ 788 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 789 struct sk_buff *skb = alloc_skb(size, priority); 790 if (skb) { 791 skb_set_owner_r(skb, sk); 792 return skb; 793 } 794 } 795 return NULL; 796} 797 798/* 799 * Allocate a memory block from the socket's option memory buffer. 800 */ 801void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) 802{ 803 if ((unsigned)size <= sysctl_optmem_max && 804 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 805 void *mem; 806 /* First do the add, to avoid the race if kmalloc 807 * might sleep. 808 */ 809 atomic_add(size, &sk->sk_omem_alloc); 810 mem = kmalloc(size, priority); 811 if (mem) 812 return mem; 813 atomic_sub(size, &sk->sk_omem_alloc); 814 } 815 return NULL; 816} 817 818/* 819 * Free an option memory block. 820 */ 821void sock_kfree_s(struct sock *sk, void *mem, int size) 822{ 823 kfree(mem); 824 atomic_sub(size, &sk->sk_omem_alloc); 825} 826 827/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 828 I think, these locks should be removed for datagram sockets. 829 */ 830static long sock_wait_for_wmem(struct sock * sk, long timeo) 831{ 832 DEFINE_WAIT(wait); 833 834 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 835 for (;;) { 836 if (!timeo) 837 break; 838 if (signal_pending(current)) 839 break; 840 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 841 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 842 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 843 break; 844 if (sk->sk_shutdown & SEND_SHUTDOWN) 845 break; 846 if (sk->sk_err) 847 break; 848 timeo = schedule_timeout(timeo); 849 } 850 finish_wait(sk->sk_sleep, &wait); 851 return timeo; 852} 853 854 855/* 856 * Generic send/receive buffer handlers 857 */ 858 859static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 860 unsigned long header_len, 861 unsigned long data_len, 862 int noblock, int *errcode) 863{ 864 struct sk_buff *skb; 865 unsigned int gfp_mask; 866 long timeo; 867 int err; 868 869 gfp_mask = sk->sk_allocation; 870 if (gfp_mask & __GFP_WAIT) 871 gfp_mask |= __GFP_REPEAT; 872 873 timeo = sock_sndtimeo(sk, noblock); 874 while (1) { 875 err = sock_error(sk); 876 if (err != 0) 877 goto failure; 878 879 err = -EPIPE; 880 if (sk->sk_shutdown & SEND_SHUTDOWN) 881 goto failure; 882 883 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 884 skb = alloc_skb(header_len, sk->sk_allocation); 885 if (skb) { 886 int npages; 887 int i; 888 889 /* No pages, we're done... */ 890 if (!data_len) 891 break; 892 893 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 894 skb->truesize += data_len; 895 skb_shinfo(skb)->nr_frags = npages; 896 for (i = 0; i < npages; i++) { 897 struct page *page; 898 skb_frag_t *frag; 899 900 page = alloc_pages(sk->sk_allocation, 0); 901 if (!page) { 902 err = -ENOBUFS; 903 skb_shinfo(skb)->nr_frags = i; 904 kfree_skb(skb); 905 goto failure; 906 } 907 908 frag = &skb_shinfo(skb)->frags[i]; 909 frag->page = page; 910 frag->page_offset = 0; 911 frag->size = (data_len >= PAGE_SIZE ? 912 PAGE_SIZE : 913 data_len); 914 data_len -= PAGE_SIZE; 915 } 916 917 /* Full success... */ 918 break; 919 } 920 err = -ENOBUFS; 921 goto failure; 922 } 923 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 924 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 925 err = -EAGAIN; 926 if (!timeo) 927 goto failure; 928 if (signal_pending(current)) 929 goto interrupted; 930 timeo = sock_wait_for_wmem(sk, timeo); 931 } 932 933 skb_set_owner_w(skb, sk); 934 return skb; 935 936interrupted: 937 err = sock_intr_errno(timeo); 938failure: 939 *errcode = err; 940 return NULL; 941} 942 943struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 944 int noblock, int *errcode) 945{ 946 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 947} 948 949static void __lock_sock(struct sock *sk) 950{ 951 DEFINE_WAIT(wait); 952 953 for(;;) { 954 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 955 TASK_UNINTERRUPTIBLE); 956 spin_unlock_bh(&sk->sk_lock.slock); 957 schedule(); 958 spin_lock_bh(&sk->sk_lock.slock); 959 if(!sock_owned_by_user(sk)) 960 break; 961 } 962 finish_wait(&sk->sk_lock.wq, &wait); 963} 964 965static void __release_sock(struct sock *sk) 966{ 967 struct sk_buff *skb = sk->sk_backlog.head; 968 969 do { 970 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 971 bh_unlock_sock(sk); 972 973 do { 974 struct sk_buff *next = skb->next; 975 976 skb->next = NULL; 977 sk->sk_backlog_rcv(sk, skb); 978 979 /* 980 * We are in process context here with softirqs 981 * disabled, use cond_resched_softirq() to preempt. 982 * This is safe to do because we've taken the backlog 983 * queue private: 984 */ 985 cond_resched_softirq(); 986 987 skb = next; 988 } while (skb != NULL); 989 990 bh_lock_sock(sk); 991 } while((skb = sk->sk_backlog.head) != NULL); 992} 993 994/** 995 * sk_wait_data - wait for data to arrive at sk_receive_queue 996 * @sk: sock to wait on 997 * @timeo: for how long 998 * 999 * Now socket state including sk->sk_err is changed only under lock, 1000 * hence we may omit checks after joining wait queue. 1001 * We check receive queue before schedule() only as optimization; 1002 * it is very likely that release_sock() added new data. 1003 */ 1004int sk_wait_data(struct sock *sk, long *timeo) 1005{ 1006 int rc; 1007 DEFINE_WAIT(wait); 1008 1009 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1010 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1011 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1012 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1013 finish_wait(sk->sk_sleep, &wait); 1014 return rc; 1015} 1016 1017EXPORT_SYMBOL(sk_wait_data); 1018 1019/* 1020 * Set of default routines for initialising struct proto_ops when 1021 * the protocol does not support a particular function. In certain 1022 * cases where it makes no sense for a protocol to have a "do nothing" 1023 * function, some default processing is provided. 1024 */ 1025 1026int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 1027{ 1028 return -EOPNOTSUPP; 1029} 1030 1031int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 1032 int len, int flags) 1033{ 1034 return -EOPNOTSUPP; 1035} 1036 1037int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 1038{ 1039 return -EOPNOTSUPP; 1040} 1041 1042int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 1043{ 1044 return -EOPNOTSUPP; 1045} 1046 1047int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 1048 int *len, int peer) 1049{ 1050 return -EOPNOTSUPP; 1051} 1052 1053unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) 1054{ 1055 return 0; 1056} 1057 1058int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1059{ 1060 return -EOPNOTSUPP; 1061} 1062 1063int sock_no_listen(struct socket *sock, int backlog) 1064{ 1065 return -EOPNOTSUPP; 1066} 1067 1068int sock_no_shutdown(struct socket *sock, int how) 1069{ 1070 return -EOPNOTSUPP; 1071} 1072 1073int sock_no_setsockopt(struct socket *sock, int level, int optname, 1074 char __user *optval, int optlen) 1075{ 1076 return -EOPNOTSUPP; 1077} 1078 1079int sock_no_getsockopt(struct socket *sock, int level, int optname, 1080 char __user *optval, int __user *optlen) 1081{ 1082 return -EOPNOTSUPP; 1083} 1084 1085int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1086 size_t len) 1087{ 1088 return -EOPNOTSUPP; 1089} 1090 1091int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1092 size_t len, int flags) 1093{ 1094 return -EOPNOTSUPP; 1095} 1096 1097int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 1098{ 1099 /* Mirror missing mmap method error code */ 1100 return -ENODEV; 1101} 1102 1103ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 1104{ 1105 ssize_t res; 1106 struct msghdr msg = {.msg_flags = flags}; 1107 struct kvec iov; 1108 char *kaddr = kmap(page); 1109 iov.iov_base = kaddr + offset; 1110 iov.iov_len = size; 1111 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 1112 kunmap(page); 1113 return res; 1114} 1115 1116/* 1117 * Default Socket Callbacks 1118 */ 1119 1120static void sock_def_wakeup(struct sock *sk) 1121{ 1122 read_lock(&sk->sk_callback_lock); 1123 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1124 wake_up_interruptible_all(sk->sk_sleep); 1125 read_unlock(&sk->sk_callback_lock); 1126} 1127 1128static void sock_def_error_report(struct sock *sk) 1129{ 1130 read_lock(&sk->sk_callback_lock); 1131 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1132 wake_up_interruptible(sk->sk_sleep); 1133 sk_wake_async(sk,0,POLL_ERR); 1134 read_unlock(&sk->sk_callback_lock); 1135} 1136 1137static void sock_def_readable(struct sock *sk, int len) 1138{ 1139 read_lock(&sk->sk_callback_lock); 1140 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1141 wake_up_interruptible(sk->sk_sleep); 1142 sk_wake_async(sk,1,POLL_IN); 1143 read_unlock(&sk->sk_callback_lock); 1144} 1145 1146static void sock_def_write_space(struct sock *sk) 1147{ 1148 read_lock(&sk->sk_callback_lock); 1149 1150 /* Do not wake up a writer until he can make "significant" 1151 * progress. --DaveM 1152 */ 1153 if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1154 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1155 wake_up_interruptible(sk->sk_sleep); 1156 1157 /* Should agree with poll, otherwise some programs break */ 1158 if (sock_writeable(sk)) 1159 sk_wake_async(sk, 2, POLL_OUT); 1160 } 1161 1162 read_unlock(&sk->sk_callback_lock); 1163} 1164 1165static void sock_def_destruct(struct sock *sk) 1166{ 1167 if (sk->sk_protinfo) 1168 kfree(sk->sk_protinfo); 1169} 1170 1171void sk_send_sigurg(struct sock *sk) 1172{ 1173 if (sk->sk_socket && sk->sk_socket->file) 1174 if (send_sigurg(&sk->sk_socket->file->f_owner)) 1175 sk_wake_async(sk, 3, POLL_PRI); 1176} 1177 1178void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1179 unsigned long expires) 1180{ 1181 if (!mod_timer(timer, expires)) 1182 sock_hold(sk); 1183} 1184 1185EXPORT_SYMBOL(sk_reset_timer); 1186 1187void sk_stop_timer(struct sock *sk, struct timer_list* timer) 1188{ 1189 if (timer_pending(timer) && del_timer(timer)) 1190 __sock_put(sk); 1191} 1192 1193EXPORT_SYMBOL(sk_stop_timer); 1194 1195void sock_init_data(struct socket *sock, struct sock *sk) 1196{ 1197 skb_queue_head_init(&sk->sk_receive_queue); 1198 skb_queue_head_init(&sk->sk_write_queue); 1199 skb_queue_head_init(&sk->sk_error_queue); 1200 1201 sk->sk_send_head = NULL; 1202 1203 init_timer(&sk->sk_timer); 1204 1205 sk->sk_allocation = GFP_KERNEL; 1206 sk->sk_rcvbuf = sysctl_rmem_default; 1207 sk->sk_sndbuf = sysctl_wmem_default; 1208 sk->sk_state = TCP_CLOSE; 1209 sk->sk_socket = sock; 1210 1211 sock_set_flag(sk, SOCK_ZAPPED); 1212 1213 if(sock) 1214 { 1215 sk->sk_type = sock->type; 1216 sk->sk_sleep = &sock->wait; 1217 sock->sk = sk; 1218 } else 1219 sk->sk_sleep = NULL; 1220 1221 rwlock_init(&sk->sk_dst_lock); 1222 rwlock_init(&sk->sk_callback_lock); 1223 1224 sk->sk_state_change = sock_def_wakeup; 1225 sk->sk_data_ready = sock_def_readable; 1226 sk->sk_write_space = sock_def_write_space; 1227 sk->sk_error_report = sock_def_error_report; 1228 sk->sk_destruct = sock_def_destruct; 1229 1230 sk->sk_sndmsg_page = NULL; 1231 sk->sk_sndmsg_off = 0; 1232 1233 sk->sk_peercred.pid = 0; 1234 sk->sk_peercred.uid = -1; 1235 sk->sk_peercred.gid = -1; 1236 sk->sk_write_pending = 0; 1237 sk->sk_rcvlowat = 1; 1238 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1239 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 1240 1241 sk->sk_stamp.tv_sec = -1L; 1242 sk->sk_stamp.tv_usec = -1L; 1243 1244 atomic_set(&sk->sk_refcnt, 1); 1245} 1246 1247void fastcall lock_sock(struct sock *sk) 1248{ 1249 might_sleep(); 1250 spin_lock_bh(&(sk->sk_lock.slock)); 1251 if (sk->sk_lock.owner) 1252 __lock_sock(sk); 1253 sk->sk_lock.owner = (void *)1; 1254 spin_unlock_bh(&(sk->sk_lock.slock)); 1255} 1256 1257EXPORT_SYMBOL(lock_sock); 1258 1259void fastcall release_sock(struct sock *sk) 1260{ 1261 spin_lock_bh(&(sk->sk_lock.slock)); 1262 if (sk->sk_backlog.tail) 1263 __release_sock(sk); 1264 sk->sk_lock.owner = NULL; 1265 if (waitqueue_active(&(sk->sk_lock.wq))) 1266 wake_up(&(sk->sk_lock.wq)); 1267 spin_unlock_bh(&(sk->sk_lock.slock)); 1268} 1269EXPORT_SYMBOL(release_sock); 1270 1271int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 1272{ 1273 if (!sock_flag(sk, SOCK_TIMESTAMP)) 1274 sock_enable_timestamp(sk); 1275 if (sk->sk_stamp.tv_sec == -1) 1276 return -ENOENT; 1277 if (sk->sk_stamp.tv_sec == 0) 1278 do_gettimeofday(&sk->sk_stamp); 1279 return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ? 1280 -EFAULT : 0; 1281} 1282EXPORT_SYMBOL(sock_get_timestamp); 1283 1284void sock_enable_timestamp(struct sock *sk) 1285{ 1286 if (!sock_flag(sk, SOCK_TIMESTAMP)) { 1287 sock_set_flag(sk, SOCK_TIMESTAMP); 1288 net_enable_timestamp(); 1289 } 1290} 1291EXPORT_SYMBOL(sock_enable_timestamp); 1292 1293/* 1294 * Get a socket option on an socket. 1295 * 1296 * FIX: POSIX 1003.1g is very ambiguous here. It states that 1297 * asynchronous errors should be reported by getsockopt. We assume 1298 * this means if you specify SO_ERROR (otherwise whats the point of it). 1299 */ 1300int sock_common_getsockopt(struct socket *sock, int level, int optname, 1301 char __user *optval, int __user *optlen) 1302{ 1303 struct sock *sk = sock->sk; 1304 1305 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 1306} 1307 1308EXPORT_SYMBOL(sock_common_getsockopt); 1309 1310int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 1311 struct msghdr *msg, size_t size, int flags) 1312{ 1313 struct sock *sk = sock->sk; 1314 int addr_len = 0; 1315 int err; 1316 1317 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, 1318 flags & ~MSG_DONTWAIT, &addr_len); 1319 if (err >= 0) 1320 msg->msg_namelen = addr_len; 1321 return err; 1322} 1323 1324EXPORT_SYMBOL(sock_common_recvmsg); 1325 1326/* 1327 * Set socket options on an inet socket. 1328 */ 1329int sock_common_setsockopt(struct socket *sock, int level, int optname, 1330 char __user *optval, int optlen) 1331{ 1332 struct sock *sk = sock->sk; 1333 1334 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 1335} 1336 1337EXPORT_SYMBOL(sock_common_setsockopt); 1338 1339void sk_common_release(struct sock *sk) 1340{ 1341 if (sk->sk_prot->destroy) 1342 sk->sk_prot->destroy(sk); 1343 1344 /* 1345 * Observation: when sock_common_release is called, processes have 1346 * no access to socket. But net still has. 1347 * Step one, detach it from networking: 1348 * 1349 * A. Remove from hash tables. 1350 */ 1351 1352 sk->sk_prot->unhash(sk); 1353 1354 /* 1355 * In this point socket cannot receive new packets, but it is possible 1356 * that some packets are in flight because some CPU runs receiver and 1357 * did hash table lookup before we unhashed socket. They will achieve 1358 * receive queue and will be purged by socket destructor. 1359 * 1360 * Also we still have packets pending on receive queue and probably, 1361 * our own packets waiting in device queues. sock_destroy will drain 1362 * receive queue, but transmitted packets will delay socket destruction 1363 * until the last reference will be released. 1364 */ 1365 1366 sock_orphan(sk); 1367 1368 xfrm_sk_free_policy(sk); 1369 1370 sk_refcnt_debug_release(sk); 1371 sock_put(sk); 1372} 1373 1374EXPORT_SYMBOL(sk_common_release); 1375 1376static DEFINE_RWLOCK(proto_list_lock); 1377static LIST_HEAD(proto_list); 1378 1379int proto_register(struct proto *prot, int alloc_slab) 1380{ 1381 char *request_sock_slab_name = NULL; 1382 char *timewait_sock_slab_name; 1383 int rc = -ENOBUFS; 1384 1385 if (alloc_slab) { 1386 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 1387 SLAB_HWCACHE_ALIGN, NULL, NULL); 1388 1389 if (prot->slab == NULL) { 1390 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", 1391 prot->name); 1392 goto out; 1393 } 1394 1395 if (prot->rsk_prot != NULL) { 1396 static const char mask[] = "request_sock_%s"; 1397 1398 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); 1399 if (request_sock_slab_name == NULL) 1400 goto out_free_sock_slab; 1401 1402 sprintf(request_sock_slab_name, mask, prot->name); 1403 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, 1404 prot->rsk_prot->obj_size, 0, 1405 SLAB_HWCACHE_ALIGN, NULL, NULL); 1406 1407 if (prot->rsk_prot->slab == NULL) { 1408 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", 1409 prot->name); 1410 goto out_free_request_sock_slab_name; 1411 } 1412 } 1413 1414 if (prot->twsk_obj_size) { 1415 static const char mask[] = "tw_sock_%s"; 1416 1417 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); 1418 1419 if (timewait_sock_slab_name == NULL) 1420 goto out_free_request_sock_slab; 1421 1422 sprintf(timewait_sock_slab_name, mask, prot->name); 1423 prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, 1424 prot->twsk_obj_size, 1425 0, SLAB_HWCACHE_ALIGN, 1426 NULL, NULL); 1427 if (prot->twsk_slab == NULL) 1428 goto out_free_timewait_sock_slab_name; 1429 } 1430 } 1431 1432 write_lock(&proto_list_lock); 1433 list_add(&prot->node, &proto_list); 1434 write_unlock(&proto_list_lock); 1435 rc = 0; 1436out: 1437 return rc; 1438out_free_timewait_sock_slab_name: 1439 kfree(timewait_sock_slab_name); 1440out_free_request_sock_slab: 1441 if (prot->rsk_prot && prot->rsk_prot->slab) { 1442 kmem_cache_destroy(prot->rsk_prot->slab); 1443 prot->rsk_prot->slab = NULL; 1444 } 1445out_free_request_sock_slab_name: 1446 kfree(request_sock_slab_name); 1447out_free_sock_slab: 1448 kmem_cache_destroy(prot->slab); 1449 prot->slab = NULL; 1450 goto out; 1451} 1452 1453EXPORT_SYMBOL(proto_register); 1454 1455void proto_unregister(struct proto *prot) 1456{ 1457 write_lock(&proto_list_lock); 1458 1459 if (prot->slab != NULL) { 1460 kmem_cache_destroy(prot->slab); 1461 prot->slab = NULL; 1462 } 1463 1464 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { 1465 const char *name = kmem_cache_name(prot->rsk_prot->slab); 1466 1467 kmem_cache_destroy(prot->rsk_prot->slab); 1468 kfree(name); 1469 prot->rsk_prot->slab = NULL; 1470 } 1471 1472 if (prot->twsk_slab != NULL) { 1473 const char *name = kmem_cache_name(prot->twsk_slab); 1474 1475 kmem_cache_destroy(prot->twsk_slab); 1476 kfree(name); 1477 prot->twsk_slab = NULL; 1478 } 1479 1480 list_del(&prot->node); 1481 write_unlock(&proto_list_lock); 1482} 1483 1484EXPORT_SYMBOL(proto_unregister); 1485 1486#ifdef CONFIG_PROC_FS 1487static inline struct proto *__proto_head(void) 1488{ 1489 return list_entry(proto_list.next, struct proto, node); 1490} 1491 1492static inline struct proto *proto_head(void) 1493{ 1494 return list_empty(&proto_list) ? NULL : __proto_head(); 1495} 1496 1497static inline struct proto *proto_next(struct proto *proto) 1498{ 1499 return proto->node.next == &proto_list ? NULL : 1500 list_entry(proto->node.next, struct proto, node); 1501} 1502 1503static inline struct proto *proto_get_idx(loff_t pos) 1504{ 1505 struct proto *proto; 1506 loff_t i = 0; 1507 1508 list_for_each_entry(proto, &proto_list, node) 1509 if (i++ == pos) 1510 goto out; 1511 1512 proto = NULL; 1513out: 1514 return proto; 1515} 1516 1517static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 1518{ 1519 read_lock(&proto_list_lock); 1520 return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN; 1521} 1522 1523static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1524{ 1525 ++*pos; 1526 return v == SEQ_START_TOKEN ? proto_head() : proto_next(v); 1527} 1528 1529static void proto_seq_stop(struct seq_file *seq, void *v) 1530{ 1531 read_unlock(&proto_list_lock); 1532} 1533 1534static char proto_method_implemented(const void *method) 1535{ 1536 return method == NULL ? 'n' : 'y'; 1537} 1538 1539static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 1540{ 1541 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " 1542 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 1543 proto->name, 1544 proto->obj_size, 1545 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1, 1546 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, 1547 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", 1548 proto->max_header, 1549 proto->slab == NULL ? "no" : "yes", 1550 module_name(proto->owner), 1551 proto_method_implemented(proto->close), 1552 proto_method_implemented(proto->connect), 1553 proto_method_implemented(proto->disconnect), 1554 proto_method_implemented(proto->accept), 1555 proto_method_implemented(proto->ioctl), 1556 proto_method_implemented(proto->init), 1557 proto_method_implemented(proto->destroy), 1558 proto_method_implemented(proto->shutdown), 1559 proto_method_implemented(proto->setsockopt), 1560 proto_method_implemented(proto->getsockopt), 1561 proto_method_implemented(proto->sendmsg), 1562 proto_method_implemented(proto->recvmsg), 1563 proto_method_implemented(proto->sendpage), 1564 proto_method_implemented(proto->bind), 1565 proto_method_implemented(proto->backlog_rcv), 1566 proto_method_implemented(proto->hash), 1567 proto_method_implemented(proto->unhash), 1568 proto_method_implemented(proto->get_port), 1569 proto_method_implemented(proto->enter_memory_pressure)); 1570} 1571 1572static int proto_seq_show(struct seq_file *seq, void *v) 1573{ 1574 if (v == SEQ_START_TOKEN) 1575 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 1576 "protocol", 1577 "size", 1578 "sockets", 1579 "memory", 1580 "press", 1581 "maxhdr", 1582 "slab", 1583 "module", 1584 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 1585 else 1586 proto_seq_printf(seq, v); 1587 return 0; 1588} 1589 1590static struct seq_operations proto_seq_ops = { 1591 .start = proto_seq_start, 1592 .next = proto_seq_next, 1593 .stop = proto_seq_stop, 1594 .show = proto_seq_show, 1595}; 1596 1597static int proto_seq_open(struct inode *inode, struct file *file) 1598{ 1599 return seq_open(file, &proto_seq_ops); 1600} 1601 1602static struct file_operations proto_seq_fops = { 1603 .owner = THIS_MODULE, 1604 .open = proto_seq_open, 1605 .read = seq_read, 1606 .llseek = seq_lseek, 1607 .release = seq_release, 1608}; 1609 1610static int __init proto_init(void) 1611{ 1612 /* register /proc/net/protocols */ 1613 return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; 1614} 1615 1616subsys_initcall(proto_init); 1617 1618#endif /* PROC_FS */ 1619 1620EXPORT_SYMBOL(sk_alloc); 1621EXPORT_SYMBOL(sk_free); 1622EXPORT_SYMBOL(sk_send_sigurg); 1623EXPORT_SYMBOL(sock_alloc_send_skb); 1624EXPORT_SYMBOL(sock_init_data); 1625EXPORT_SYMBOL(sock_kfree_s); 1626EXPORT_SYMBOL(sock_kmalloc); 1627EXPORT_SYMBOL(sock_no_accept); 1628EXPORT_SYMBOL(sock_no_bind); 1629EXPORT_SYMBOL(sock_no_connect); 1630EXPORT_SYMBOL(sock_no_getname); 1631EXPORT_SYMBOL(sock_no_getsockopt); 1632EXPORT_SYMBOL(sock_no_ioctl); 1633EXPORT_SYMBOL(sock_no_listen); 1634EXPORT_SYMBOL(sock_no_mmap); 1635EXPORT_SYMBOL(sock_no_poll); 1636EXPORT_SYMBOL(sock_no_recvmsg); 1637EXPORT_SYMBOL(sock_no_sendmsg); 1638EXPORT_SYMBOL(sock_no_sendpage); 1639EXPORT_SYMBOL(sock_no_setsockopt); 1640EXPORT_SYMBOL(sock_no_shutdown); 1641EXPORT_SYMBOL(sock_no_socketpair); 1642EXPORT_SYMBOL(sock_rfree); 1643EXPORT_SYMBOL(sock_setsockopt); 1644EXPORT_SYMBOL(sock_wfree); 1645EXPORT_SYMBOL(sock_wmalloc); 1646EXPORT_SYMBOL(sock_i_uid); 1647EXPORT_SYMBOL(sock_i_ino); 1648#ifdef CONFIG_SYSCTL 1649EXPORT_SYMBOL(sysctl_optmem_max); 1650EXPORT_SYMBOL(sysctl_rmem_max); 1651EXPORT_SYMBOL(sysctl_wmem_max); 1652#endif 1653