sock.c revision 7cb0240492caea2f6467f827313478f41877e6ef
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Generic socket support routines. Memory allocators, socket lock/release 7 * handler for protocols to use and generic option handler. 8 * 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Fixes: 16 * Alan Cox : Numerous verify_area() problems 17 * Alan Cox : Connecting on a connecting socket 18 * now returns an error for tcp. 19 * Alan Cox : sock->protocol is set correctly. 20 * and is not sometimes left as 0. 21 * Alan Cox : connect handles icmp errors on a 22 * connect properly. Unfortunately there 23 * is a restart syscall nasty there. I 24 * can't match BSD without hacking the C 25 * library. Ideas urgently sought! 26 * Alan Cox : Disallow bind() to addresses that are 27 * not ours - especially broadcast ones!! 28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 30 * instead they leave that for the DESTROY timer. 31 * Alan Cox : Clean up error flag in accept 32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 33 * was buggy. Put a remove_sock() in the handler 34 * for memory when we hit 0. Also altered the timer 35 * code. The ACK stuff can wait and needs major 36 * TCP layer surgery. 37 * Alan Cox : Fixed TCP ack bug, removed remove sock 38 * and fixed timer/inet_bh race. 39 * Alan Cox : Added zapped flag for TCP 40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 45 * Rick Sladkey : Relaxed UDP rules for matching packets. 46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 47 * Pauline Middelink : identd support 48 * Alan Cox : Fixed connect() taking signals I think. 49 * Alan Cox : SO_LINGER supported 50 * Alan Cox : Error reporting fixes 51 * Anonymous : inet_create tidied up (sk->reuse setting) 52 * Alan Cox : inet sockets don't set sk->type! 53 * Alan Cox : Split socket option code 54 * Alan Cox : Callbacks 55 * Alan Cox : Nagle flag for Charles & Johannes stuff 56 * Alex : Removed restriction on inet fioctl 57 * Alan Cox : Splitting INET from NET core 58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 60 * Alan Cox : Split IP from generic code 61 * Alan Cox : New kfree_skbmem() 62 * Alan Cox : Make SO_DEBUG superuser only. 63 * Alan Cox : Allow anyone to clear SO_DEBUG 64 * (compatibility fix) 65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 66 * Alan Cox : Allocator for a socket is settable. 67 * Alan Cox : SO_ERROR includes soft errors. 68 * Alan Cox : Allow NULL arguments on some SO_ opts 69 * Alan Cox : Generic socket allocation to make hooks 70 * easier (suggested by Craig Metz). 71 * Michael Pall : SO_ERROR returns positive errno again 72 * Steve Whitehouse: Added default destructor to free 73 * protocol private data. 74 * Steve Whitehouse: Added various other default routines 75 * common to several socket families. 76 * Chris Evans : Call suser() check last on F_SETOWN 77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 79 * Andi Kleen : Fix write_space callback 80 * Chris Evans : Security fixes - signedness again 81 * Arnaldo C. Melo : cleanups, use skb_queue_purge 82 * 83 * To Fix: 84 * 85 * 86 * This program is free software; you can redistribute it and/or 87 * modify it under the terms of the GNU General Public License 88 * as published by the Free Software Foundation; either version 89 * 2 of the License, or (at your option) any later version. 90 */ 91 92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 93 94#include <linux/capability.h> 95#include <linux/errno.h> 96#include <linux/types.h> 97#include <linux/socket.h> 98#include <linux/in.h> 99#include <linux/kernel.h> 100#include <linux/module.h> 101#include <linux/proc_fs.h> 102#include <linux/seq_file.h> 103#include <linux/sched.h> 104#include <linux/timer.h> 105#include <linux/string.h> 106#include <linux/sockios.h> 107#include <linux/net.h> 108#include <linux/mm.h> 109#include <linux/slab.h> 110#include <linux/interrupt.h> 111#include <linux/poll.h> 112#include <linux/tcp.h> 113#include <linux/init.h> 114#include <linux/highmem.h> 115#include <linux/user_namespace.h> 116#include <linux/static_key.h> 117#include <linux/memcontrol.h> 118#include <linux/prefetch.h> 119 120#include <asm/uaccess.h> 121 122#include <linux/netdevice.h> 123#include <net/protocol.h> 124#include <linux/skbuff.h> 125#include <net/net_namespace.h> 126#include <net/request_sock.h> 127#include <net/sock.h> 128#include <linux/net_tstamp.h> 129#include <net/xfrm.h> 130#include <linux/ipsec.h> 131#include <net/cls_cgroup.h> 132#include <net/netprio_cgroup.h> 133 134#include <linux/filter.h> 135 136#include <trace/events/sock.h> 137 138#ifdef CONFIG_INET 139#include <net/tcp.h> 140#endif 141 142static DEFINE_MUTEX(proto_list_mutex); 143static LIST_HEAD(proto_list); 144 145#ifdef CONFIG_MEMCG_KMEM 146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 147{ 148 struct proto *proto; 149 int ret = 0; 150 151 mutex_lock(&proto_list_mutex); 152 list_for_each_entry(proto, &proto_list, node) { 153 if (proto->init_cgroup) { 154 ret = proto->init_cgroup(memcg, ss); 155 if (ret) 156 goto out; 157 } 158 } 159 160 mutex_unlock(&proto_list_mutex); 161 return ret; 162out: 163 list_for_each_entry_continue_reverse(proto, &proto_list, node) 164 if (proto->destroy_cgroup) 165 proto->destroy_cgroup(memcg); 166 mutex_unlock(&proto_list_mutex); 167 return ret; 168} 169 170void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) 171{ 172 struct proto *proto; 173 174 mutex_lock(&proto_list_mutex); 175 list_for_each_entry_reverse(proto, &proto_list, node) 176 if (proto->destroy_cgroup) 177 proto->destroy_cgroup(memcg); 178 mutex_unlock(&proto_list_mutex); 179} 180#endif 181 182/* 183 * Each address family might have different locking rules, so we have 184 * one slock key per address family: 185 */ 186static struct lock_class_key af_family_keys[AF_MAX]; 187static struct lock_class_key af_family_slock_keys[AF_MAX]; 188 189struct static_key memcg_socket_limit_enabled; 190EXPORT_SYMBOL(memcg_socket_limit_enabled); 191 192/* 193 * Make lock validator output more readable. (we pre-construct these 194 * strings build-time, so that runtime initialization of socket 195 * locks is fast): 196 */ 197static const char *const af_family_key_strings[AF_MAX+1] = { 198 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , 199 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", 200 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , 201 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , 202 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 203 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 204 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 205 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 206 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 207 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 208 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 209 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 210 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 211 "sk_lock-AF_NFC" , "sk_lock-AF_MAX" 212}; 213static const char *const af_family_slock_key_strings[AF_MAX+1] = { 214 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 215 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", 216 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , 217 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , 218 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 219 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 220 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 221 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , 222 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 223 "slock-27" , "slock-28" , "slock-AF_CAN" , 224 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 225 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 226 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 227 "slock-AF_NFC" , "slock-AF_MAX" 228}; 229static const char *const af_family_clock_key_strings[AF_MAX+1] = { 230 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 231 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", 232 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , 233 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , 234 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 235 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 236 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 237 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , 238 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 239 "clock-27" , "clock-28" , "clock-AF_CAN" , 240 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 241 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 242 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 243 "clock-AF_NFC" , "clock-AF_MAX" 244}; 245 246/* 247 * sk_callback_lock locking rules are per-address-family, 248 * so split the lock classes by using a per-AF key: 249 */ 250static struct lock_class_key af_callback_keys[AF_MAX]; 251 252/* Take into consideration the size of the struct sk_buff overhead in the 253 * determination of these values, since that is non-constant across 254 * platforms. This makes socket queueing behavior and performance 255 * not depend upon such differences. 256 */ 257#define _SK_MEM_PACKETS 256 258#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) 259#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 260#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 261 262/* Run time adjustable parameters. */ 263__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 264EXPORT_SYMBOL(sysctl_wmem_max); 265__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 266EXPORT_SYMBOL(sysctl_rmem_max); 267__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 268__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 269 270/* Maximal space eaten by iovec or ancillary data plus some space */ 271int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 272EXPORT_SYMBOL(sysctl_optmem_max); 273 274/** 275 * sk_set_memalloc - sets %SOCK_MEMALLOC 276 * @sk: socket to set it on 277 * 278 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. 279 * It's the responsibility of the admin to adjust min_free_kbytes 280 * to meet the requirements 281 */ 282void sk_set_memalloc(struct sock *sk) 283{ 284 sock_set_flag(sk, SOCK_MEMALLOC); 285 sk->sk_allocation |= __GFP_MEMALLOC; 286} 287EXPORT_SYMBOL_GPL(sk_set_memalloc); 288 289void sk_clear_memalloc(struct sock *sk) 290{ 291 sock_reset_flag(sk, SOCK_MEMALLOC); 292 sk->sk_allocation &= ~__GFP_MEMALLOC; 293} 294EXPORT_SYMBOL_GPL(sk_clear_memalloc); 295 296#if defined(CONFIG_CGROUPS) 297#if !defined(CONFIG_NET_CLS_CGROUP) 298int net_cls_subsys_id = -1; 299EXPORT_SYMBOL_GPL(net_cls_subsys_id); 300#endif 301#if !defined(CONFIG_NETPRIO_CGROUP) 302int net_prio_subsys_id = -1; 303EXPORT_SYMBOL_GPL(net_prio_subsys_id); 304#endif 305#endif 306 307static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 308{ 309 struct timeval tv; 310 311 if (optlen < sizeof(tv)) 312 return -EINVAL; 313 if (copy_from_user(&tv, optval, sizeof(tv))) 314 return -EFAULT; 315 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) 316 return -EDOM; 317 318 if (tv.tv_sec < 0) { 319 static int warned __read_mostly; 320 321 *timeo_p = 0; 322 if (warned < 10 && net_ratelimit()) { 323 warned++; 324 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", 325 __func__, current->comm, task_pid_nr(current)); 326 } 327 return 0; 328 } 329 *timeo_p = MAX_SCHEDULE_TIMEOUT; 330 if (tv.tv_sec == 0 && tv.tv_usec == 0) 331 return 0; 332 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) 333 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); 334 return 0; 335} 336 337static void sock_warn_obsolete_bsdism(const char *name) 338{ 339 static int warned; 340 static char warncomm[TASK_COMM_LEN]; 341 if (strcmp(warncomm, current->comm) && warned < 5) { 342 strcpy(warncomm, current->comm); 343 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", 344 warncomm, name); 345 warned++; 346 } 347} 348 349#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) 350 351static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 352{ 353 if (sk->sk_flags & flags) { 354 sk->sk_flags &= ~flags; 355 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 356 net_disable_timestamp(); 357 } 358} 359 360 361int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 362{ 363 int err; 364 int skb_len; 365 unsigned long flags; 366 struct sk_buff_head *list = &sk->sk_receive_queue; 367 368 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { 369 atomic_inc(&sk->sk_drops); 370 trace_sock_rcvqueue_full(sk, skb); 371 return -ENOMEM; 372 } 373 374 err = sk_filter(sk, skb); 375 if (err) 376 return err; 377 378 if (!sk_rmem_schedule(sk, skb->truesize)) { 379 atomic_inc(&sk->sk_drops); 380 return -ENOBUFS; 381 } 382 383 skb->dev = NULL; 384 skb_set_owner_r(skb, sk); 385 386 /* Cache the SKB length before we tack it onto the receive 387 * queue. Once it is added it no longer belongs to us and 388 * may be freed by other threads of control pulling packets 389 * from the queue. 390 */ 391 skb_len = skb->len; 392 393 /* we escape from rcu protected region, make sure we dont leak 394 * a norefcounted dst 395 */ 396 skb_dst_force(skb); 397 398 spin_lock_irqsave(&list->lock, flags); 399 skb->dropcount = atomic_read(&sk->sk_drops); 400 __skb_queue_tail(list, skb); 401 spin_unlock_irqrestore(&list->lock, flags); 402 403 if (!sock_flag(sk, SOCK_DEAD)) 404 sk->sk_data_ready(sk, skb_len); 405 return 0; 406} 407EXPORT_SYMBOL(sock_queue_rcv_skb); 408 409int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) 410{ 411 int rc = NET_RX_SUCCESS; 412 413 if (sk_filter(sk, skb)) 414 goto discard_and_relse; 415 416 skb->dev = NULL; 417 418 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { 419 atomic_inc(&sk->sk_drops); 420 goto discard_and_relse; 421 } 422 if (nested) 423 bh_lock_sock_nested(sk); 424 else 425 bh_lock_sock(sk); 426 if (!sock_owned_by_user(sk)) { 427 /* 428 * trylock + unlock semantics: 429 */ 430 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); 431 432 rc = sk_backlog_rcv(sk, skb); 433 434 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 435 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 436 bh_unlock_sock(sk); 437 atomic_inc(&sk->sk_drops); 438 goto discard_and_relse; 439 } 440 441 bh_unlock_sock(sk); 442out: 443 sock_put(sk); 444 return rc; 445discard_and_relse: 446 kfree_skb(skb); 447 goto out; 448} 449EXPORT_SYMBOL(sk_receive_skb); 450 451void sk_reset_txq(struct sock *sk) 452{ 453 sk_tx_queue_clear(sk); 454} 455EXPORT_SYMBOL(sk_reset_txq); 456 457struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 458{ 459 struct dst_entry *dst = __sk_dst_get(sk); 460 461 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 462 sk_tx_queue_clear(sk); 463 RCU_INIT_POINTER(sk->sk_dst_cache, NULL); 464 dst_release(dst); 465 return NULL; 466 } 467 468 return dst; 469} 470EXPORT_SYMBOL(__sk_dst_check); 471 472struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) 473{ 474 struct dst_entry *dst = sk_dst_get(sk); 475 476 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 477 sk_dst_reset(sk); 478 dst_release(dst); 479 return NULL; 480 } 481 482 return dst; 483} 484EXPORT_SYMBOL(sk_dst_check); 485 486static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) 487{ 488 int ret = -ENOPROTOOPT; 489#ifdef CONFIG_NETDEVICES 490 struct net *net = sock_net(sk); 491 char devname[IFNAMSIZ]; 492 int index; 493 494 /* Sorry... */ 495 ret = -EPERM; 496 if (!capable(CAP_NET_RAW)) 497 goto out; 498 499 ret = -EINVAL; 500 if (optlen < 0) 501 goto out; 502 503 /* Bind this socket to a particular device like "eth0", 504 * as specified in the passed interface name. If the 505 * name is "" or the option length is zero the socket 506 * is not bound. 507 */ 508 if (optlen > IFNAMSIZ - 1) 509 optlen = IFNAMSIZ - 1; 510 memset(devname, 0, sizeof(devname)); 511 512 ret = -EFAULT; 513 if (copy_from_user(devname, optval, optlen)) 514 goto out; 515 516 index = 0; 517 if (devname[0] != '\0') { 518 struct net_device *dev; 519 520 rcu_read_lock(); 521 dev = dev_get_by_name_rcu(net, devname); 522 if (dev) 523 index = dev->ifindex; 524 rcu_read_unlock(); 525 ret = -ENODEV; 526 if (!dev) 527 goto out; 528 } 529 530 lock_sock(sk); 531 sk->sk_bound_dev_if = index; 532 sk_dst_reset(sk); 533 release_sock(sk); 534 535 ret = 0; 536 537out: 538#endif 539 540 return ret; 541} 542 543static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) 544{ 545 if (valbool) 546 sock_set_flag(sk, bit); 547 else 548 sock_reset_flag(sk, bit); 549} 550 551/* 552 * This is meant for all protocols to use and covers goings on 553 * at the socket level. Everything here is generic. 554 */ 555 556int sock_setsockopt(struct socket *sock, int level, int optname, 557 char __user *optval, unsigned int optlen) 558{ 559 struct sock *sk = sock->sk; 560 int val; 561 int valbool; 562 struct linger ling; 563 int ret = 0; 564 565 /* 566 * Options without arguments 567 */ 568 569 if (optname == SO_BINDTODEVICE) 570 return sock_bindtodevice(sk, optval, optlen); 571 572 if (optlen < sizeof(int)) 573 return -EINVAL; 574 575 if (get_user(val, (int __user *)optval)) 576 return -EFAULT; 577 578 valbool = val ? 1 : 0; 579 580 lock_sock(sk); 581 582 switch (optname) { 583 case SO_DEBUG: 584 if (val && !capable(CAP_NET_ADMIN)) 585 ret = -EACCES; 586 else 587 sock_valbool_flag(sk, SOCK_DBG, valbool); 588 break; 589 case SO_REUSEADDR: 590 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 591 break; 592 case SO_TYPE: 593 case SO_PROTOCOL: 594 case SO_DOMAIN: 595 case SO_ERROR: 596 ret = -ENOPROTOOPT; 597 break; 598 case SO_DONTROUTE: 599 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); 600 break; 601 case SO_BROADCAST: 602 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 603 break; 604 case SO_SNDBUF: 605 /* Don't error on this BSD doesn't and if you think 606 * about it this is right. Otherwise apps have to 607 * play 'guess the biggest size' games. RCVBUF/SNDBUF 608 * are treated in BSD as hints 609 */ 610 val = min_t(u32, val, sysctl_wmem_max); 611set_sndbuf: 612 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 613 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); 614 /* Wake up sending tasks if we upped the value. */ 615 sk->sk_write_space(sk); 616 break; 617 618 case SO_SNDBUFFORCE: 619 if (!capable(CAP_NET_ADMIN)) { 620 ret = -EPERM; 621 break; 622 } 623 goto set_sndbuf; 624 625 case SO_RCVBUF: 626 /* Don't error on this BSD doesn't and if you think 627 * about it this is right. Otherwise apps have to 628 * play 'guess the biggest size' games. RCVBUF/SNDBUF 629 * are treated in BSD as hints 630 */ 631 val = min_t(u32, val, sysctl_rmem_max); 632set_rcvbuf: 633 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 634 /* 635 * We double it on the way in to account for 636 * "struct sk_buff" etc. overhead. Applications 637 * assume that the SO_RCVBUF setting they make will 638 * allow that much actual data to be received on that 639 * socket. 640 * 641 * Applications are unaware that "struct sk_buff" and 642 * other overheads allocate from the receive buffer 643 * during socket buffer allocation. 644 * 645 * And after considering the possible alternatives, 646 * returning the value we actually used in getsockopt 647 * is the most desirable behavior. 648 */ 649 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); 650 break; 651 652 case SO_RCVBUFFORCE: 653 if (!capable(CAP_NET_ADMIN)) { 654 ret = -EPERM; 655 break; 656 } 657 goto set_rcvbuf; 658 659 case SO_KEEPALIVE: 660#ifdef CONFIG_INET 661 if (sk->sk_protocol == IPPROTO_TCP) 662 tcp_set_keepalive(sk, valbool); 663#endif 664 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 665 break; 666 667 case SO_OOBINLINE: 668 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 669 break; 670 671 case SO_NO_CHECK: 672 sk->sk_no_check = valbool; 673 break; 674 675 case SO_PRIORITY: 676 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 677 sk->sk_priority = val; 678 else 679 ret = -EPERM; 680 break; 681 682 case SO_LINGER: 683 if (optlen < sizeof(ling)) { 684 ret = -EINVAL; /* 1003.1g */ 685 break; 686 } 687 if (copy_from_user(&ling, optval, sizeof(ling))) { 688 ret = -EFAULT; 689 break; 690 } 691 if (!ling.l_onoff) 692 sock_reset_flag(sk, SOCK_LINGER); 693 else { 694#if (BITS_PER_LONG == 32) 695 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 696 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 697 else 698#endif 699 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 700 sock_set_flag(sk, SOCK_LINGER); 701 } 702 break; 703 704 case SO_BSDCOMPAT: 705 sock_warn_obsolete_bsdism("setsockopt"); 706 break; 707 708 case SO_PASSCRED: 709 if (valbool) 710 set_bit(SOCK_PASSCRED, &sock->flags); 711 else 712 clear_bit(SOCK_PASSCRED, &sock->flags); 713 break; 714 715 case SO_TIMESTAMP: 716 case SO_TIMESTAMPNS: 717 if (valbool) { 718 if (optname == SO_TIMESTAMP) 719 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 720 else 721 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 722 sock_set_flag(sk, SOCK_RCVTSTAMP); 723 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 724 } else { 725 sock_reset_flag(sk, SOCK_RCVTSTAMP); 726 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 727 } 728 break; 729 730 case SO_TIMESTAMPING: 731 if (val & ~SOF_TIMESTAMPING_MASK) { 732 ret = -EINVAL; 733 break; 734 } 735 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, 736 val & SOF_TIMESTAMPING_TX_HARDWARE); 737 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, 738 val & SOF_TIMESTAMPING_TX_SOFTWARE); 739 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, 740 val & SOF_TIMESTAMPING_RX_HARDWARE); 741 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 742 sock_enable_timestamp(sk, 743 SOCK_TIMESTAMPING_RX_SOFTWARE); 744 else 745 sock_disable_timestamp(sk, 746 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 747 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, 748 val & SOF_TIMESTAMPING_SOFTWARE); 749 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, 750 val & SOF_TIMESTAMPING_SYS_HARDWARE); 751 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, 752 val & SOF_TIMESTAMPING_RAW_HARDWARE); 753 break; 754 755 case SO_RCVLOWAT: 756 if (val < 0) 757 val = INT_MAX; 758 sk->sk_rcvlowat = val ? : 1; 759 break; 760 761 case SO_RCVTIMEO: 762 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 763 break; 764 765 case SO_SNDTIMEO: 766 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 767 break; 768 769 case SO_ATTACH_FILTER: 770 ret = -EINVAL; 771 if (optlen == sizeof(struct sock_fprog)) { 772 struct sock_fprog fprog; 773 774 ret = -EFAULT; 775 if (copy_from_user(&fprog, optval, sizeof(fprog))) 776 break; 777 778 ret = sk_attach_filter(&fprog, sk); 779 } 780 break; 781 782 case SO_DETACH_FILTER: 783 ret = sk_detach_filter(sk); 784 break; 785 786 case SO_PASSSEC: 787 if (valbool) 788 set_bit(SOCK_PASSSEC, &sock->flags); 789 else 790 clear_bit(SOCK_PASSSEC, &sock->flags); 791 break; 792 case SO_MARK: 793 if (!capable(CAP_NET_ADMIN)) 794 ret = -EPERM; 795 else 796 sk->sk_mark = val; 797 break; 798 799 /* We implement the SO_SNDLOWAT etc to 800 not be settable (1003.1g 5.3) */ 801 case SO_RXQ_OVFL: 802 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 803 break; 804 805 case SO_WIFI_STATUS: 806 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 807 break; 808 809 case SO_PEEK_OFF: 810 if (sock->ops->set_peek_off) 811 sock->ops->set_peek_off(sk, val); 812 else 813 ret = -EOPNOTSUPP; 814 break; 815 816 case SO_NOFCS: 817 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 818 break; 819 820 default: 821 ret = -ENOPROTOOPT; 822 break; 823 } 824 release_sock(sk); 825 return ret; 826} 827EXPORT_SYMBOL(sock_setsockopt); 828 829 830void cred_to_ucred(struct pid *pid, const struct cred *cred, 831 struct ucred *ucred) 832{ 833 ucred->pid = pid_vnr(pid); 834 ucred->uid = ucred->gid = -1; 835 if (cred) { 836 struct user_namespace *current_ns = current_user_ns(); 837 838 ucred->uid = from_kuid(current_ns, cred->euid); 839 ucred->gid = from_kgid(current_ns, cred->egid); 840 } 841} 842EXPORT_SYMBOL_GPL(cred_to_ucred); 843 844int sock_getsockopt(struct socket *sock, int level, int optname, 845 char __user *optval, int __user *optlen) 846{ 847 struct sock *sk = sock->sk; 848 849 union { 850 int val; 851 struct linger ling; 852 struct timeval tm; 853 } v; 854 855 int lv = sizeof(int); 856 int len; 857 858 if (get_user(len, optlen)) 859 return -EFAULT; 860 if (len < 0) 861 return -EINVAL; 862 863 memset(&v, 0, sizeof(v)); 864 865 switch (optname) { 866 case SO_DEBUG: 867 v.val = sock_flag(sk, SOCK_DBG); 868 break; 869 870 case SO_DONTROUTE: 871 v.val = sock_flag(sk, SOCK_LOCALROUTE); 872 break; 873 874 case SO_BROADCAST: 875 v.val = sock_flag(sk, SOCK_BROADCAST); 876 break; 877 878 case SO_SNDBUF: 879 v.val = sk->sk_sndbuf; 880 break; 881 882 case SO_RCVBUF: 883 v.val = sk->sk_rcvbuf; 884 break; 885 886 case SO_REUSEADDR: 887 v.val = sk->sk_reuse; 888 break; 889 890 case SO_KEEPALIVE: 891 v.val = sock_flag(sk, SOCK_KEEPOPEN); 892 break; 893 894 case SO_TYPE: 895 v.val = sk->sk_type; 896 break; 897 898 case SO_PROTOCOL: 899 v.val = sk->sk_protocol; 900 break; 901 902 case SO_DOMAIN: 903 v.val = sk->sk_family; 904 break; 905 906 case SO_ERROR: 907 v.val = -sock_error(sk); 908 if (v.val == 0) 909 v.val = xchg(&sk->sk_err_soft, 0); 910 break; 911 912 case SO_OOBINLINE: 913 v.val = sock_flag(sk, SOCK_URGINLINE); 914 break; 915 916 case SO_NO_CHECK: 917 v.val = sk->sk_no_check; 918 break; 919 920 case SO_PRIORITY: 921 v.val = sk->sk_priority; 922 break; 923 924 case SO_LINGER: 925 lv = sizeof(v.ling); 926 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); 927 v.ling.l_linger = sk->sk_lingertime / HZ; 928 break; 929 930 case SO_BSDCOMPAT: 931 sock_warn_obsolete_bsdism("getsockopt"); 932 break; 933 934 case SO_TIMESTAMP: 935 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && 936 !sock_flag(sk, SOCK_RCVTSTAMPNS); 937 break; 938 939 case SO_TIMESTAMPNS: 940 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 941 break; 942 943 case SO_TIMESTAMPING: 944 v.val = 0; 945 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 946 v.val |= SOF_TIMESTAMPING_TX_HARDWARE; 947 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 948 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; 949 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) 950 v.val |= SOF_TIMESTAMPING_RX_HARDWARE; 951 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) 952 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; 953 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) 954 v.val |= SOF_TIMESTAMPING_SOFTWARE; 955 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) 956 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; 957 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) 958 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; 959 break; 960 961 case SO_RCVTIMEO: 962 lv = sizeof(struct timeval); 963 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 964 v.tm.tv_sec = 0; 965 v.tm.tv_usec = 0; 966 } else { 967 v.tm.tv_sec = sk->sk_rcvtimeo / HZ; 968 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; 969 } 970 break; 971 972 case SO_SNDTIMEO: 973 lv = sizeof(struct timeval); 974 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 975 v.tm.tv_sec = 0; 976 v.tm.tv_usec = 0; 977 } else { 978 v.tm.tv_sec = sk->sk_sndtimeo / HZ; 979 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; 980 } 981 break; 982 983 case SO_RCVLOWAT: 984 v.val = sk->sk_rcvlowat; 985 break; 986 987 case SO_SNDLOWAT: 988 v.val = 1; 989 break; 990 991 case SO_PASSCRED: 992 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); 993 break; 994 995 case SO_PEERCRED: 996 { 997 struct ucred peercred; 998 if (len > sizeof(peercred)) 999 len = sizeof(peercred); 1000 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); 1001 if (copy_to_user(optval, &peercred, len)) 1002 return -EFAULT; 1003 goto lenout; 1004 } 1005 1006 case SO_PEERNAME: 1007 { 1008 char address[128]; 1009 1010 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 1011 return -ENOTCONN; 1012 if (lv < len) 1013 return -EINVAL; 1014 if (copy_to_user(optval, address, len)) 1015 return -EFAULT; 1016 goto lenout; 1017 } 1018 1019 /* Dubious BSD thing... Probably nobody even uses it, but 1020 * the UNIX standard wants it for whatever reason... -DaveM 1021 */ 1022 case SO_ACCEPTCONN: 1023 v.val = sk->sk_state == TCP_LISTEN; 1024 break; 1025 1026 case SO_PASSSEC: 1027 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); 1028 break; 1029 1030 case SO_PEERSEC: 1031 return security_socket_getpeersec_stream(sock, optval, optlen, len); 1032 1033 case SO_MARK: 1034 v.val = sk->sk_mark; 1035 break; 1036 1037 case SO_RXQ_OVFL: 1038 v.val = sock_flag(sk, SOCK_RXQ_OVFL); 1039 break; 1040 1041 case SO_WIFI_STATUS: 1042 v.val = sock_flag(sk, SOCK_WIFI_STATUS); 1043 break; 1044 1045 case SO_PEEK_OFF: 1046 if (!sock->ops->set_peek_off) 1047 return -EOPNOTSUPP; 1048 1049 v.val = sk->sk_peek_off; 1050 break; 1051 case SO_NOFCS: 1052 v.val = sock_flag(sk, SOCK_NOFCS); 1053 break; 1054 default: 1055 return -ENOPROTOOPT; 1056 } 1057 1058 if (len > lv) 1059 len = lv; 1060 if (copy_to_user(optval, &v, len)) 1061 return -EFAULT; 1062lenout: 1063 if (put_user(len, optlen)) 1064 return -EFAULT; 1065 return 0; 1066} 1067 1068/* 1069 * Initialize an sk_lock. 1070 * 1071 * (We also register the sk_lock with the lock validator.) 1072 */ 1073static inline void sock_lock_init(struct sock *sk) 1074{ 1075 sock_lock_init_class_and_name(sk, 1076 af_family_slock_key_strings[sk->sk_family], 1077 af_family_slock_keys + sk->sk_family, 1078 af_family_key_strings[sk->sk_family], 1079 af_family_keys + sk->sk_family); 1080} 1081 1082/* 1083 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, 1084 * even temporarly, because of RCU lookups. sk_node should also be left as is. 1085 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end 1086 */ 1087static void sock_copy(struct sock *nsk, const struct sock *osk) 1088{ 1089#ifdef CONFIG_SECURITY_NETWORK 1090 void *sptr = nsk->sk_security; 1091#endif 1092 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); 1093 1094 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, 1095 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); 1096 1097#ifdef CONFIG_SECURITY_NETWORK 1098 nsk->sk_security = sptr; 1099 security_sk_clone(osk, nsk); 1100#endif 1101} 1102 1103/* 1104 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes 1105 * un-modified. Special care is taken when initializing object to zero. 1106 */ 1107static inline void sk_prot_clear_nulls(struct sock *sk, int size) 1108{ 1109 if (offsetof(struct sock, sk_node.next) != 0) 1110 memset(sk, 0, offsetof(struct sock, sk_node.next)); 1111 memset(&sk->sk_node.pprev, 0, 1112 size - offsetof(struct sock, sk_node.pprev)); 1113} 1114 1115void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) 1116{ 1117 unsigned long nulls1, nulls2; 1118 1119 nulls1 = offsetof(struct sock, __sk_common.skc_node.next); 1120 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); 1121 if (nulls1 > nulls2) 1122 swap(nulls1, nulls2); 1123 1124 if (nulls1 != 0) 1125 memset((char *)sk, 0, nulls1); 1126 memset((char *)sk + nulls1 + sizeof(void *), 0, 1127 nulls2 - nulls1 - sizeof(void *)); 1128 memset((char *)sk + nulls2 + sizeof(void *), 0, 1129 size - nulls2 - sizeof(void *)); 1130} 1131EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); 1132 1133static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1134 int family) 1135{ 1136 struct sock *sk; 1137 struct kmem_cache *slab; 1138 1139 slab = prot->slab; 1140 if (slab != NULL) { 1141 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); 1142 if (!sk) 1143 return sk; 1144 if (priority & __GFP_ZERO) { 1145 if (prot->clear_sk) 1146 prot->clear_sk(sk, prot->obj_size); 1147 else 1148 sk_prot_clear_nulls(sk, prot->obj_size); 1149 } 1150 } else 1151 sk = kmalloc(prot->obj_size, priority); 1152 1153 if (sk != NULL) { 1154 kmemcheck_annotate_bitfield(sk, flags); 1155 1156 if (security_sk_alloc(sk, family, priority)) 1157 goto out_free; 1158 1159 if (!try_module_get(prot->owner)) 1160 goto out_free_sec; 1161 sk_tx_queue_clear(sk); 1162 } 1163 1164 return sk; 1165 1166out_free_sec: 1167 security_sk_free(sk); 1168out_free: 1169 if (slab != NULL) 1170 kmem_cache_free(slab, sk); 1171 else 1172 kfree(sk); 1173 return NULL; 1174} 1175 1176static void sk_prot_free(struct proto *prot, struct sock *sk) 1177{ 1178 struct kmem_cache *slab; 1179 struct module *owner; 1180 1181 owner = prot->owner; 1182 slab = prot->slab; 1183 1184 security_sk_free(sk); 1185 if (slab != NULL) 1186 kmem_cache_free(slab, sk); 1187 else 1188 kfree(sk); 1189 module_put(owner); 1190} 1191 1192#ifdef CONFIG_CGROUPS 1193void sock_update_classid(struct sock *sk) 1194{ 1195 u32 classid; 1196 1197 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1198 classid = task_cls_classid(current); 1199 rcu_read_unlock(); 1200 if (classid && classid != sk->sk_classid) 1201 sk->sk_classid = classid; 1202} 1203EXPORT_SYMBOL(sock_update_classid); 1204 1205void sock_update_netprioidx(struct sock *sk, struct task_struct *task) 1206{ 1207 if (in_interrupt()) 1208 return; 1209 1210 sk->sk_cgrp_prioidx = task_netprioidx(task); 1211} 1212EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1213#endif 1214 1215/** 1216 * sk_alloc - All socket objects are allocated here 1217 * @net: the applicable net namespace 1218 * @family: protocol family 1219 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1220 * @prot: struct proto associated with this new sock instance 1221 */ 1222struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1223 struct proto *prot) 1224{ 1225 struct sock *sk; 1226 1227 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); 1228 if (sk) { 1229 sk->sk_family = family; 1230 /* 1231 * See comment in struct sock definition to understand 1232 * why we need sk_prot_creator -acme 1233 */ 1234 sk->sk_prot = sk->sk_prot_creator = prot; 1235 sock_lock_init(sk); 1236 sock_net_set(sk, get_net(net)); 1237 atomic_set(&sk->sk_wmem_alloc, 1); 1238 1239 sock_update_classid(sk); 1240 sock_update_netprioidx(sk, current); 1241 } 1242 1243 return sk; 1244} 1245EXPORT_SYMBOL(sk_alloc); 1246 1247static void __sk_free(struct sock *sk) 1248{ 1249 struct sk_filter *filter; 1250 1251 if (sk->sk_destruct) 1252 sk->sk_destruct(sk); 1253 1254 filter = rcu_dereference_check(sk->sk_filter, 1255 atomic_read(&sk->sk_wmem_alloc) == 0); 1256 if (filter) { 1257 sk_filter_uncharge(sk, filter); 1258 RCU_INIT_POINTER(sk->sk_filter, NULL); 1259 } 1260 1261 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 1262 1263 if (atomic_read(&sk->sk_omem_alloc)) 1264 pr_debug("%s: optmem leakage (%d bytes) detected\n", 1265 __func__, atomic_read(&sk->sk_omem_alloc)); 1266 1267 if (sk->sk_peer_cred) 1268 put_cred(sk->sk_peer_cred); 1269 put_pid(sk->sk_peer_pid); 1270 put_net(sock_net(sk)); 1271 sk_prot_free(sk->sk_prot_creator, sk); 1272} 1273 1274void sk_free(struct sock *sk) 1275{ 1276 /* 1277 * We subtract one from sk_wmem_alloc and can know if 1278 * some packets are still in some tx queue. 1279 * If not null, sock_wfree() will call __sk_free(sk) later 1280 */ 1281 if (atomic_dec_and_test(&sk->sk_wmem_alloc)) 1282 __sk_free(sk); 1283} 1284EXPORT_SYMBOL(sk_free); 1285 1286/* 1287 * Last sock_put should drop reference to sk->sk_net. It has already 1288 * been dropped in sk_change_net. Taking reference to stopping namespace 1289 * is not an option. 1290 * Take reference to a socket to remove it from hash _alive_ and after that 1291 * destroy it in the context of init_net. 1292 */ 1293void sk_release_kernel(struct sock *sk) 1294{ 1295 if (sk == NULL || sk->sk_socket == NULL) 1296 return; 1297 1298 sock_hold(sk); 1299 sock_release(sk->sk_socket); 1300 release_net(sock_net(sk)); 1301 sock_net_set(sk, get_net(&init_net)); 1302 sock_put(sk); 1303} 1304EXPORT_SYMBOL(sk_release_kernel); 1305 1306static void sk_update_clone(const struct sock *sk, struct sock *newsk) 1307{ 1308 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) 1309 sock_update_memcg(newsk); 1310} 1311 1312/** 1313 * sk_clone_lock - clone a socket, and lock its clone 1314 * @sk: the socket to clone 1315 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1316 * 1317 * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 1318 */ 1319struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 1320{ 1321 struct sock *newsk; 1322 1323 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); 1324 if (newsk != NULL) { 1325 struct sk_filter *filter; 1326 1327 sock_copy(newsk, sk); 1328 1329 /* SANITY */ 1330 get_net(sock_net(newsk)); 1331 sk_node_init(&newsk->sk_node); 1332 sock_lock_init(newsk); 1333 bh_lock_sock(newsk); 1334 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1335 newsk->sk_backlog.len = 0; 1336 1337 atomic_set(&newsk->sk_rmem_alloc, 0); 1338 /* 1339 * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) 1340 */ 1341 atomic_set(&newsk->sk_wmem_alloc, 1); 1342 atomic_set(&newsk->sk_omem_alloc, 0); 1343 skb_queue_head_init(&newsk->sk_receive_queue); 1344 skb_queue_head_init(&newsk->sk_write_queue); 1345#ifdef CONFIG_NET_DMA 1346 skb_queue_head_init(&newsk->sk_async_wait_queue); 1347#endif 1348 1349 spin_lock_init(&newsk->sk_dst_lock); 1350 rwlock_init(&newsk->sk_callback_lock); 1351 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1352 af_callback_keys + newsk->sk_family, 1353 af_family_clock_key_strings[newsk->sk_family]); 1354 1355 newsk->sk_dst_cache = NULL; 1356 newsk->sk_wmem_queued = 0; 1357 newsk->sk_forward_alloc = 0; 1358 newsk->sk_send_head = NULL; 1359 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1360 1361 sock_reset_flag(newsk, SOCK_DONE); 1362 skb_queue_head_init(&newsk->sk_error_queue); 1363 1364 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1365 if (filter != NULL) 1366 sk_filter_charge(newsk, filter); 1367 1368 if (unlikely(xfrm_sk_clone_policy(newsk))) { 1369 /* It is still raw copy of parent, so invalidate 1370 * destructor and make plain sk_free() */ 1371 newsk->sk_destruct = NULL; 1372 bh_unlock_sock(newsk); 1373 sk_free(newsk); 1374 newsk = NULL; 1375 goto out; 1376 } 1377 1378 newsk->sk_err = 0; 1379 newsk->sk_priority = 0; 1380 /* 1381 * Before updating sk_refcnt, we must commit prior changes to memory 1382 * (Documentation/RCU/rculist_nulls.txt for details) 1383 */ 1384 smp_wmb(); 1385 atomic_set(&newsk->sk_refcnt, 2); 1386 1387 /* 1388 * Increment the counter in the same struct proto as the master 1389 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that 1390 * is the same as sk->sk_prot->socks, as this field was copied 1391 * with memcpy). 1392 * 1393 * This _changes_ the previous behaviour, where 1394 * tcp_create_openreq_child always was incrementing the 1395 * equivalent to tcp_prot->socks (inet_sock_nr), so this have 1396 * to be taken into account in all callers. -acme 1397 */ 1398 sk_refcnt_debug_inc(newsk); 1399 sk_set_socket(newsk, NULL); 1400 newsk->sk_wq = NULL; 1401 1402 sk_update_clone(sk, newsk); 1403 1404 if (newsk->sk_prot->sockets_allocated) 1405 sk_sockets_allocated_inc(newsk); 1406 1407 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) 1408 net_enable_timestamp(); 1409 } 1410out: 1411 return newsk; 1412} 1413EXPORT_SYMBOL_GPL(sk_clone_lock); 1414 1415void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1416{ 1417 __sk_dst_set(sk, dst); 1418 sk->sk_route_caps = dst->dev->features; 1419 if (sk->sk_route_caps & NETIF_F_GSO) 1420 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1421 sk->sk_route_caps &= ~sk->sk_route_nocaps; 1422 if (sk_can_gso(sk)) { 1423 if (dst->header_len) { 1424 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1425 } else { 1426 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1427 sk->sk_gso_max_size = dst->dev->gso_max_size; 1428 } 1429 } 1430} 1431EXPORT_SYMBOL_GPL(sk_setup_caps); 1432 1433void __init sk_init(void) 1434{ 1435 if (totalram_pages <= 4096) { 1436 sysctl_wmem_max = 32767; 1437 sysctl_rmem_max = 32767; 1438 sysctl_wmem_default = 32767; 1439 sysctl_rmem_default = 32767; 1440 } else if (totalram_pages >= 131072) { 1441 sysctl_wmem_max = 131071; 1442 sysctl_rmem_max = 131071; 1443 } 1444} 1445 1446/* 1447 * Simple resource managers for sockets. 1448 */ 1449 1450 1451/* 1452 * Write buffer destructor automatically called from kfree_skb. 1453 */ 1454void sock_wfree(struct sk_buff *skb) 1455{ 1456 struct sock *sk = skb->sk; 1457 unsigned int len = skb->truesize; 1458 1459 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 1460 /* 1461 * Keep a reference on sk_wmem_alloc, this will be released 1462 * after sk_write_space() call 1463 */ 1464 atomic_sub(len - 1, &sk->sk_wmem_alloc); 1465 sk->sk_write_space(sk); 1466 len = 1; 1467 } 1468 /* 1469 * if sk_wmem_alloc reaches 0, we must finish what sk_free() 1470 * could not do because of in-flight packets 1471 */ 1472 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) 1473 __sk_free(sk); 1474} 1475EXPORT_SYMBOL(sock_wfree); 1476 1477/* 1478 * Read buffer destructor automatically called from kfree_skb. 1479 */ 1480void sock_rfree(struct sk_buff *skb) 1481{ 1482 struct sock *sk = skb->sk; 1483 unsigned int len = skb->truesize; 1484 1485 atomic_sub(len, &sk->sk_rmem_alloc); 1486 sk_mem_uncharge(sk, len); 1487} 1488EXPORT_SYMBOL(sock_rfree); 1489 1490void sock_edemux(struct sk_buff *skb) 1491{ 1492 sock_put(skb->sk); 1493} 1494EXPORT_SYMBOL(sock_edemux); 1495 1496int sock_i_uid(struct sock *sk) 1497{ 1498 int uid; 1499 1500 read_lock_bh(&sk->sk_callback_lock); 1501 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1502 read_unlock_bh(&sk->sk_callback_lock); 1503 return uid; 1504} 1505EXPORT_SYMBOL(sock_i_uid); 1506 1507unsigned long sock_i_ino(struct sock *sk) 1508{ 1509 unsigned long ino; 1510 1511 read_lock_bh(&sk->sk_callback_lock); 1512 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1513 read_unlock_bh(&sk->sk_callback_lock); 1514 return ino; 1515} 1516EXPORT_SYMBOL(sock_i_ino); 1517 1518/* 1519 * Allocate a skb from the socket's send buffer. 1520 */ 1521struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 1522 gfp_t priority) 1523{ 1524 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1525 struct sk_buff *skb = alloc_skb(size, priority); 1526 if (skb) { 1527 skb_set_owner_w(skb, sk); 1528 return skb; 1529 } 1530 } 1531 return NULL; 1532} 1533EXPORT_SYMBOL(sock_wmalloc); 1534 1535/* 1536 * Allocate a skb from the socket's receive buffer. 1537 */ 1538struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, 1539 gfp_t priority) 1540{ 1541 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 1542 struct sk_buff *skb = alloc_skb(size, priority); 1543 if (skb) { 1544 skb_set_owner_r(skb, sk); 1545 return skb; 1546 } 1547 } 1548 return NULL; 1549} 1550 1551/* 1552 * Allocate a memory block from the socket's option memory buffer. 1553 */ 1554void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1555{ 1556 if ((unsigned int)size <= sysctl_optmem_max && 1557 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 1558 void *mem; 1559 /* First do the add, to avoid the race if kmalloc 1560 * might sleep. 1561 */ 1562 atomic_add(size, &sk->sk_omem_alloc); 1563 mem = kmalloc(size, priority); 1564 if (mem) 1565 return mem; 1566 atomic_sub(size, &sk->sk_omem_alloc); 1567 } 1568 return NULL; 1569} 1570EXPORT_SYMBOL(sock_kmalloc); 1571 1572/* 1573 * Free an option memory block. 1574 */ 1575void sock_kfree_s(struct sock *sk, void *mem, int size) 1576{ 1577 kfree(mem); 1578 atomic_sub(size, &sk->sk_omem_alloc); 1579} 1580EXPORT_SYMBOL(sock_kfree_s); 1581 1582/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 1583 I think, these locks should be removed for datagram sockets. 1584 */ 1585static long sock_wait_for_wmem(struct sock *sk, long timeo) 1586{ 1587 DEFINE_WAIT(wait); 1588 1589 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1590 for (;;) { 1591 if (!timeo) 1592 break; 1593 if (signal_pending(current)) 1594 break; 1595 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1596 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1597 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1598 break; 1599 if (sk->sk_shutdown & SEND_SHUTDOWN) 1600 break; 1601 if (sk->sk_err) 1602 break; 1603 timeo = schedule_timeout(timeo); 1604 } 1605 finish_wait(sk_sleep(sk), &wait); 1606 return timeo; 1607} 1608 1609 1610/* 1611 * Generic send/receive buffer handlers 1612 */ 1613 1614struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 1615 unsigned long data_len, int noblock, 1616 int *errcode) 1617{ 1618 struct sk_buff *skb; 1619 gfp_t gfp_mask; 1620 long timeo; 1621 int err; 1622 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 1623 1624 err = -EMSGSIZE; 1625 if (npages > MAX_SKB_FRAGS) 1626 goto failure; 1627 1628 gfp_mask = sk->sk_allocation; 1629 if (gfp_mask & __GFP_WAIT) 1630 gfp_mask |= __GFP_REPEAT; 1631 1632 timeo = sock_sndtimeo(sk, noblock); 1633 while (1) { 1634 err = sock_error(sk); 1635 if (err != 0) 1636 goto failure; 1637 1638 err = -EPIPE; 1639 if (sk->sk_shutdown & SEND_SHUTDOWN) 1640 goto failure; 1641 1642 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1643 skb = alloc_skb(header_len, gfp_mask); 1644 if (skb) { 1645 int i; 1646 1647 /* No pages, we're done... */ 1648 if (!data_len) 1649 break; 1650 1651 skb->truesize += data_len; 1652 skb_shinfo(skb)->nr_frags = npages; 1653 for (i = 0; i < npages; i++) { 1654 struct page *page; 1655 1656 page = alloc_pages(sk->sk_allocation, 0); 1657 if (!page) { 1658 err = -ENOBUFS; 1659 skb_shinfo(skb)->nr_frags = i; 1660 kfree_skb(skb); 1661 goto failure; 1662 } 1663 1664 __skb_fill_page_desc(skb, i, 1665 page, 0, 1666 (data_len >= PAGE_SIZE ? 1667 PAGE_SIZE : 1668 data_len)); 1669 data_len -= PAGE_SIZE; 1670 } 1671 1672 /* Full success... */ 1673 break; 1674 } 1675 err = -ENOBUFS; 1676 goto failure; 1677 } 1678 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1679 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1680 err = -EAGAIN; 1681 if (!timeo) 1682 goto failure; 1683 if (signal_pending(current)) 1684 goto interrupted; 1685 timeo = sock_wait_for_wmem(sk, timeo); 1686 } 1687 1688 skb_set_owner_w(skb, sk); 1689 return skb; 1690 1691interrupted: 1692 err = sock_intr_errno(timeo); 1693failure: 1694 *errcode = err; 1695 return NULL; 1696} 1697EXPORT_SYMBOL(sock_alloc_send_pskb); 1698 1699struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1700 int noblock, int *errcode) 1701{ 1702 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1703} 1704EXPORT_SYMBOL(sock_alloc_send_skb); 1705 1706static void __lock_sock(struct sock *sk) 1707 __releases(&sk->sk_lock.slock) 1708 __acquires(&sk->sk_lock.slock) 1709{ 1710 DEFINE_WAIT(wait); 1711 1712 for (;;) { 1713 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 1714 TASK_UNINTERRUPTIBLE); 1715 spin_unlock_bh(&sk->sk_lock.slock); 1716 schedule(); 1717 spin_lock_bh(&sk->sk_lock.slock); 1718 if (!sock_owned_by_user(sk)) 1719 break; 1720 } 1721 finish_wait(&sk->sk_lock.wq, &wait); 1722} 1723 1724static void __release_sock(struct sock *sk) 1725 __releases(&sk->sk_lock.slock) 1726 __acquires(&sk->sk_lock.slock) 1727{ 1728 struct sk_buff *skb = sk->sk_backlog.head; 1729 1730 do { 1731 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 1732 bh_unlock_sock(sk); 1733 1734 do { 1735 struct sk_buff *next = skb->next; 1736 1737 prefetch(next); 1738 WARN_ON_ONCE(skb_dst_is_noref(skb)); 1739 skb->next = NULL; 1740 sk_backlog_rcv(sk, skb); 1741 1742 /* 1743 * We are in process context here with softirqs 1744 * disabled, use cond_resched_softirq() to preempt. 1745 * This is safe to do because we've taken the backlog 1746 * queue private: 1747 */ 1748 cond_resched_softirq(); 1749 1750 skb = next; 1751 } while (skb != NULL); 1752 1753 bh_lock_sock(sk); 1754 } while ((skb = sk->sk_backlog.head) != NULL); 1755 1756 /* 1757 * Doing the zeroing here guarantee we can not loop forever 1758 * while a wild producer attempts to flood us. 1759 */ 1760 sk->sk_backlog.len = 0; 1761} 1762 1763/** 1764 * sk_wait_data - wait for data to arrive at sk_receive_queue 1765 * @sk: sock to wait on 1766 * @timeo: for how long 1767 * 1768 * Now socket state including sk->sk_err is changed only under lock, 1769 * hence we may omit checks after joining wait queue. 1770 * We check receive queue before schedule() only as optimization; 1771 * it is very likely that release_sock() added new data. 1772 */ 1773int sk_wait_data(struct sock *sk, long *timeo) 1774{ 1775 int rc; 1776 DEFINE_WAIT(wait); 1777 1778 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1779 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1780 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1781 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1782 finish_wait(sk_sleep(sk), &wait); 1783 return rc; 1784} 1785EXPORT_SYMBOL(sk_wait_data); 1786 1787/** 1788 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 1789 * @sk: socket 1790 * @size: memory size to allocate 1791 * @kind: allocation type 1792 * 1793 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 1794 * rmem allocation. This function assumes that protocols which have 1795 * memory_pressure use sk_wmem_queued as write buffer accounting. 1796 */ 1797int __sk_mem_schedule(struct sock *sk, int size, int kind) 1798{ 1799 struct proto *prot = sk->sk_prot; 1800 int amt = sk_mem_pages(size); 1801 long allocated; 1802 int parent_status = UNDER_LIMIT; 1803 1804 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1805 1806 allocated = sk_memory_allocated_add(sk, amt, &parent_status); 1807 1808 /* Under limit. */ 1809 if (parent_status == UNDER_LIMIT && 1810 allocated <= sk_prot_mem_limits(sk, 0)) { 1811 sk_leave_memory_pressure(sk); 1812 return 1; 1813 } 1814 1815 /* Under pressure. (we or our parents) */ 1816 if ((parent_status > SOFT_LIMIT) || 1817 allocated > sk_prot_mem_limits(sk, 1)) 1818 sk_enter_memory_pressure(sk); 1819 1820 /* Over hard limit (we or our parents) */ 1821 if ((parent_status == OVER_LIMIT) || 1822 (allocated > sk_prot_mem_limits(sk, 2))) 1823 goto suppress_allocation; 1824 1825 /* guarantee minimum buffer size under pressure */ 1826 if (kind == SK_MEM_RECV) { 1827 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 1828 return 1; 1829 1830 } else { /* SK_MEM_SEND */ 1831 if (sk->sk_type == SOCK_STREAM) { 1832 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 1833 return 1; 1834 } else if (atomic_read(&sk->sk_wmem_alloc) < 1835 prot->sysctl_wmem[0]) 1836 return 1; 1837 } 1838 1839 if (sk_has_memory_pressure(sk)) { 1840 int alloc; 1841 1842 if (!sk_under_memory_pressure(sk)) 1843 return 1; 1844 alloc = sk_sockets_allocated_read_positive(sk); 1845 if (sk_prot_mem_limits(sk, 2) > alloc * 1846 sk_mem_pages(sk->sk_wmem_queued + 1847 atomic_read(&sk->sk_rmem_alloc) + 1848 sk->sk_forward_alloc)) 1849 return 1; 1850 } 1851 1852suppress_allocation: 1853 1854 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { 1855 sk_stream_moderate_sndbuf(sk); 1856 1857 /* Fail only if socket is _under_ its sndbuf. 1858 * In this case we cannot block, so that we have to fail. 1859 */ 1860 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) 1861 return 1; 1862 } 1863 1864 trace_sock_exceed_buf_limit(sk, prot, allocated); 1865 1866 /* Alas. Undo changes. */ 1867 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1868 1869 sk_memory_allocated_sub(sk, amt); 1870 1871 return 0; 1872} 1873EXPORT_SYMBOL(__sk_mem_schedule); 1874 1875/** 1876 * __sk_reclaim - reclaim memory_allocated 1877 * @sk: socket 1878 */ 1879void __sk_mem_reclaim(struct sock *sk) 1880{ 1881 sk_memory_allocated_sub(sk, 1882 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); 1883 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1884 1885 if (sk_under_memory_pressure(sk) && 1886 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 1887 sk_leave_memory_pressure(sk); 1888} 1889EXPORT_SYMBOL(__sk_mem_reclaim); 1890 1891 1892/* 1893 * Set of default routines for initialising struct proto_ops when 1894 * the protocol does not support a particular function. In certain 1895 * cases where it makes no sense for a protocol to have a "do nothing" 1896 * function, some default processing is provided. 1897 */ 1898 1899int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 1900{ 1901 return -EOPNOTSUPP; 1902} 1903EXPORT_SYMBOL(sock_no_bind); 1904 1905int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 1906 int len, int flags) 1907{ 1908 return -EOPNOTSUPP; 1909} 1910EXPORT_SYMBOL(sock_no_connect); 1911 1912int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 1913{ 1914 return -EOPNOTSUPP; 1915} 1916EXPORT_SYMBOL(sock_no_socketpair); 1917 1918int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 1919{ 1920 return -EOPNOTSUPP; 1921} 1922EXPORT_SYMBOL(sock_no_accept); 1923 1924int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 1925 int *len, int peer) 1926{ 1927 return -EOPNOTSUPP; 1928} 1929EXPORT_SYMBOL(sock_no_getname); 1930 1931unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 1932{ 1933 return 0; 1934} 1935EXPORT_SYMBOL(sock_no_poll); 1936 1937int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1938{ 1939 return -EOPNOTSUPP; 1940} 1941EXPORT_SYMBOL(sock_no_ioctl); 1942 1943int sock_no_listen(struct socket *sock, int backlog) 1944{ 1945 return -EOPNOTSUPP; 1946} 1947EXPORT_SYMBOL(sock_no_listen); 1948 1949int sock_no_shutdown(struct socket *sock, int how) 1950{ 1951 return -EOPNOTSUPP; 1952} 1953EXPORT_SYMBOL(sock_no_shutdown); 1954 1955int sock_no_setsockopt(struct socket *sock, int level, int optname, 1956 char __user *optval, unsigned int optlen) 1957{ 1958 return -EOPNOTSUPP; 1959} 1960EXPORT_SYMBOL(sock_no_setsockopt); 1961 1962int sock_no_getsockopt(struct socket *sock, int level, int optname, 1963 char __user *optval, int __user *optlen) 1964{ 1965 return -EOPNOTSUPP; 1966} 1967EXPORT_SYMBOL(sock_no_getsockopt); 1968 1969int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1970 size_t len) 1971{ 1972 return -EOPNOTSUPP; 1973} 1974EXPORT_SYMBOL(sock_no_sendmsg); 1975 1976int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1977 size_t len, int flags) 1978{ 1979 return -EOPNOTSUPP; 1980} 1981EXPORT_SYMBOL(sock_no_recvmsg); 1982 1983int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 1984{ 1985 /* Mirror missing mmap method error code */ 1986 return -ENODEV; 1987} 1988EXPORT_SYMBOL(sock_no_mmap); 1989 1990ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 1991{ 1992 ssize_t res; 1993 struct msghdr msg = {.msg_flags = flags}; 1994 struct kvec iov; 1995 char *kaddr = kmap(page); 1996 iov.iov_base = kaddr + offset; 1997 iov.iov_len = size; 1998 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 1999 kunmap(page); 2000 return res; 2001} 2002EXPORT_SYMBOL(sock_no_sendpage); 2003 2004/* 2005 * Default Socket Callbacks 2006 */ 2007 2008static void sock_def_wakeup(struct sock *sk) 2009{ 2010 struct socket_wq *wq; 2011 2012 rcu_read_lock(); 2013 wq = rcu_dereference(sk->sk_wq); 2014 if (wq_has_sleeper(wq)) 2015 wake_up_interruptible_all(&wq->wait); 2016 rcu_read_unlock(); 2017} 2018 2019static void sock_def_error_report(struct sock *sk) 2020{ 2021 struct socket_wq *wq; 2022 2023 rcu_read_lock(); 2024 wq = rcu_dereference(sk->sk_wq); 2025 if (wq_has_sleeper(wq)) 2026 wake_up_interruptible_poll(&wq->wait, POLLERR); 2027 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 2028 rcu_read_unlock(); 2029} 2030 2031static void sock_def_readable(struct sock *sk, int len) 2032{ 2033 struct socket_wq *wq; 2034 2035 rcu_read_lock(); 2036 wq = rcu_dereference(sk->sk_wq); 2037 if (wq_has_sleeper(wq)) 2038 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 2039 POLLRDNORM | POLLRDBAND); 2040 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 2041 rcu_read_unlock(); 2042} 2043 2044static void sock_def_write_space(struct sock *sk) 2045{ 2046 struct socket_wq *wq; 2047 2048 rcu_read_lock(); 2049 2050 /* Do not wake up a writer until he can make "significant" 2051 * progress. --DaveM 2052 */ 2053 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 2054 wq = rcu_dereference(sk->sk_wq); 2055 if (wq_has_sleeper(wq)) 2056 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 2057 POLLWRNORM | POLLWRBAND); 2058 2059 /* Should agree with poll, otherwise some programs break */ 2060 if (sock_writeable(sk)) 2061 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 2062 } 2063 2064 rcu_read_unlock(); 2065} 2066 2067static void sock_def_destruct(struct sock *sk) 2068{ 2069 kfree(sk->sk_protinfo); 2070} 2071 2072void sk_send_sigurg(struct sock *sk) 2073{ 2074 if (sk->sk_socket && sk->sk_socket->file) 2075 if (send_sigurg(&sk->sk_socket->file->f_owner)) 2076 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 2077} 2078EXPORT_SYMBOL(sk_send_sigurg); 2079 2080void sk_reset_timer(struct sock *sk, struct timer_list* timer, 2081 unsigned long expires) 2082{ 2083 if (!mod_timer(timer, expires)) 2084 sock_hold(sk); 2085} 2086EXPORT_SYMBOL(sk_reset_timer); 2087 2088void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2089{ 2090 if (timer_pending(timer) && del_timer(timer)) 2091 __sock_put(sk); 2092} 2093EXPORT_SYMBOL(sk_stop_timer); 2094 2095void sock_init_data(struct socket *sock, struct sock *sk) 2096{ 2097 skb_queue_head_init(&sk->sk_receive_queue); 2098 skb_queue_head_init(&sk->sk_write_queue); 2099 skb_queue_head_init(&sk->sk_error_queue); 2100#ifdef CONFIG_NET_DMA 2101 skb_queue_head_init(&sk->sk_async_wait_queue); 2102#endif 2103 2104 sk->sk_send_head = NULL; 2105 2106 init_timer(&sk->sk_timer); 2107 2108 sk->sk_allocation = GFP_KERNEL; 2109 sk->sk_rcvbuf = sysctl_rmem_default; 2110 sk->sk_sndbuf = sysctl_wmem_default; 2111 sk->sk_state = TCP_CLOSE; 2112 sk_set_socket(sk, sock); 2113 2114 sock_set_flag(sk, SOCK_ZAPPED); 2115 2116 if (sock) { 2117 sk->sk_type = sock->type; 2118 sk->sk_wq = sock->wq; 2119 sock->sk = sk; 2120 } else 2121 sk->sk_wq = NULL; 2122 2123 spin_lock_init(&sk->sk_dst_lock); 2124 rwlock_init(&sk->sk_callback_lock); 2125 lockdep_set_class_and_name(&sk->sk_callback_lock, 2126 af_callback_keys + sk->sk_family, 2127 af_family_clock_key_strings[sk->sk_family]); 2128 2129 sk->sk_state_change = sock_def_wakeup; 2130 sk->sk_data_ready = sock_def_readable; 2131 sk->sk_write_space = sock_def_write_space; 2132 sk->sk_error_report = sock_def_error_report; 2133 sk->sk_destruct = sock_def_destruct; 2134 2135 sk->sk_sndmsg_page = NULL; 2136 sk->sk_sndmsg_off = 0; 2137 sk->sk_peek_off = -1; 2138 2139 sk->sk_peer_pid = NULL; 2140 sk->sk_peer_cred = NULL; 2141 sk->sk_write_pending = 0; 2142 sk->sk_rcvlowat = 1; 2143 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 2144 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 2145 2146 sk->sk_stamp = ktime_set(-1L, 0); 2147 2148 /* 2149 * Before updating sk_refcnt, we must commit prior changes to memory 2150 * (Documentation/RCU/rculist_nulls.txt for details) 2151 */ 2152 smp_wmb(); 2153 atomic_set(&sk->sk_refcnt, 1); 2154 atomic_set(&sk->sk_drops, 0); 2155} 2156EXPORT_SYMBOL(sock_init_data); 2157 2158void lock_sock_nested(struct sock *sk, int subclass) 2159{ 2160 might_sleep(); 2161 spin_lock_bh(&sk->sk_lock.slock); 2162 if (sk->sk_lock.owned) 2163 __lock_sock(sk); 2164 sk->sk_lock.owned = 1; 2165 spin_unlock(&sk->sk_lock.slock); 2166 /* 2167 * The sk_lock has mutex_lock() semantics here: 2168 */ 2169 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 2170 local_bh_enable(); 2171} 2172EXPORT_SYMBOL(lock_sock_nested); 2173 2174void release_sock(struct sock *sk) 2175{ 2176 /* 2177 * The sk_lock has mutex_unlock() semantics: 2178 */ 2179 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 2180 2181 spin_lock_bh(&sk->sk_lock.slock); 2182 if (sk->sk_backlog.tail) 2183 __release_sock(sk); 2184 2185 if (sk->sk_prot->release_cb) 2186 sk->sk_prot->release_cb(sk); 2187 2188 sk->sk_lock.owned = 0; 2189 if (waitqueue_active(&sk->sk_lock.wq)) 2190 wake_up(&sk->sk_lock.wq); 2191 spin_unlock_bh(&sk->sk_lock.slock); 2192} 2193EXPORT_SYMBOL(release_sock); 2194 2195/** 2196 * lock_sock_fast - fast version of lock_sock 2197 * @sk: socket 2198 * 2199 * This version should be used for very small section, where process wont block 2200 * return false if fast path is taken 2201 * sk_lock.slock locked, owned = 0, BH disabled 2202 * return true if slow path is taken 2203 * sk_lock.slock unlocked, owned = 1, BH enabled 2204 */ 2205bool lock_sock_fast(struct sock *sk) 2206{ 2207 might_sleep(); 2208 spin_lock_bh(&sk->sk_lock.slock); 2209 2210 if (!sk->sk_lock.owned) 2211 /* 2212 * Note : We must disable BH 2213 */ 2214 return false; 2215 2216 __lock_sock(sk); 2217 sk->sk_lock.owned = 1; 2218 spin_unlock(&sk->sk_lock.slock); 2219 /* 2220 * The sk_lock has mutex_lock() semantics here: 2221 */ 2222 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); 2223 local_bh_enable(); 2224 return true; 2225} 2226EXPORT_SYMBOL(lock_sock_fast); 2227 2228int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2229{ 2230 struct timeval tv; 2231 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2232 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2233 tv = ktime_to_timeval(sk->sk_stamp); 2234 if (tv.tv_sec == -1) 2235 return -ENOENT; 2236 if (tv.tv_sec == 0) { 2237 sk->sk_stamp = ktime_get_real(); 2238 tv = ktime_to_timeval(sk->sk_stamp); 2239 } 2240 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; 2241} 2242EXPORT_SYMBOL(sock_get_timestamp); 2243 2244int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 2245{ 2246 struct timespec ts; 2247 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2248 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2249 ts = ktime_to_timespec(sk->sk_stamp); 2250 if (ts.tv_sec == -1) 2251 return -ENOENT; 2252 if (ts.tv_sec == 0) { 2253 sk->sk_stamp = ktime_get_real(); 2254 ts = ktime_to_timespec(sk->sk_stamp); 2255 } 2256 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; 2257} 2258EXPORT_SYMBOL(sock_get_timestampns); 2259 2260void sock_enable_timestamp(struct sock *sk, int flag) 2261{ 2262 if (!sock_flag(sk, flag)) { 2263 unsigned long previous_flags = sk->sk_flags; 2264 2265 sock_set_flag(sk, flag); 2266 /* 2267 * we just set one of the two flags which require net 2268 * time stamping, but time stamping might have been on 2269 * already because of the other one 2270 */ 2271 if (!(previous_flags & SK_FLAGS_TIMESTAMP)) 2272 net_enable_timestamp(); 2273 } 2274} 2275 2276/* 2277 * Get a socket option on an socket. 2278 * 2279 * FIX: POSIX 1003.1g is very ambiguous here. It states that 2280 * asynchronous errors should be reported by getsockopt. We assume 2281 * this means if you specify SO_ERROR (otherwise whats the point of it). 2282 */ 2283int sock_common_getsockopt(struct socket *sock, int level, int optname, 2284 char __user *optval, int __user *optlen) 2285{ 2286 struct sock *sk = sock->sk; 2287 2288 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2289} 2290EXPORT_SYMBOL(sock_common_getsockopt); 2291 2292#ifdef CONFIG_COMPAT 2293int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, 2294 char __user *optval, int __user *optlen) 2295{ 2296 struct sock *sk = sock->sk; 2297 2298 if (sk->sk_prot->compat_getsockopt != NULL) 2299 return sk->sk_prot->compat_getsockopt(sk, level, optname, 2300 optval, optlen); 2301 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2302} 2303EXPORT_SYMBOL(compat_sock_common_getsockopt); 2304#endif 2305 2306int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 2307 struct msghdr *msg, size_t size, int flags) 2308{ 2309 struct sock *sk = sock->sk; 2310 int addr_len = 0; 2311 int err; 2312 2313 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, 2314 flags & ~MSG_DONTWAIT, &addr_len); 2315 if (err >= 0) 2316 msg->msg_namelen = addr_len; 2317 return err; 2318} 2319EXPORT_SYMBOL(sock_common_recvmsg); 2320 2321/* 2322 * Set socket options on an inet socket. 2323 */ 2324int sock_common_setsockopt(struct socket *sock, int level, int optname, 2325 char __user *optval, unsigned int optlen) 2326{ 2327 struct sock *sk = sock->sk; 2328 2329 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2330} 2331EXPORT_SYMBOL(sock_common_setsockopt); 2332 2333#ifdef CONFIG_COMPAT 2334int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, 2335 char __user *optval, unsigned int optlen) 2336{ 2337 struct sock *sk = sock->sk; 2338 2339 if (sk->sk_prot->compat_setsockopt != NULL) 2340 return sk->sk_prot->compat_setsockopt(sk, level, optname, 2341 optval, optlen); 2342 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2343} 2344EXPORT_SYMBOL(compat_sock_common_setsockopt); 2345#endif 2346 2347void sk_common_release(struct sock *sk) 2348{ 2349 if (sk->sk_prot->destroy) 2350 sk->sk_prot->destroy(sk); 2351 2352 /* 2353 * Observation: when sock_common_release is called, processes have 2354 * no access to socket. But net still has. 2355 * Step one, detach it from networking: 2356 * 2357 * A. Remove from hash tables. 2358 */ 2359 2360 sk->sk_prot->unhash(sk); 2361 2362 /* 2363 * In this point socket cannot receive new packets, but it is possible 2364 * that some packets are in flight because some CPU runs receiver and 2365 * did hash table lookup before we unhashed socket. They will achieve 2366 * receive queue and will be purged by socket destructor. 2367 * 2368 * Also we still have packets pending on receive queue and probably, 2369 * our own packets waiting in device queues. sock_destroy will drain 2370 * receive queue, but transmitted packets will delay socket destruction 2371 * until the last reference will be released. 2372 */ 2373 2374 sock_orphan(sk); 2375 2376 xfrm_sk_free_policy(sk); 2377 2378 sk_refcnt_debug_release(sk); 2379 sock_put(sk); 2380} 2381EXPORT_SYMBOL(sk_common_release); 2382 2383#ifdef CONFIG_PROC_FS 2384#define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2385struct prot_inuse { 2386 int val[PROTO_INUSE_NR]; 2387}; 2388 2389static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 2390 2391#ifdef CONFIG_NET_NS 2392void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2393{ 2394 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 2395} 2396EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2397 2398int sock_prot_inuse_get(struct net *net, struct proto *prot) 2399{ 2400 int cpu, idx = prot->inuse_idx; 2401 int res = 0; 2402 2403 for_each_possible_cpu(cpu) 2404 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; 2405 2406 return res >= 0 ? res : 0; 2407} 2408EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2409 2410static int __net_init sock_inuse_init_net(struct net *net) 2411{ 2412 net->core.inuse = alloc_percpu(struct prot_inuse); 2413 return net->core.inuse ? 0 : -ENOMEM; 2414} 2415 2416static void __net_exit sock_inuse_exit_net(struct net *net) 2417{ 2418 free_percpu(net->core.inuse); 2419} 2420 2421static struct pernet_operations net_inuse_ops = { 2422 .init = sock_inuse_init_net, 2423 .exit = sock_inuse_exit_net, 2424}; 2425 2426static __init int net_inuse_init(void) 2427{ 2428 if (register_pernet_subsys(&net_inuse_ops)) 2429 panic("Cannot initialize net inuse counters"); 2430 2431 return 0; 2432} 2433 2434core_initcall(net_inuse_init); 2435#else 2436static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); 2437 2438void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2439{ 2440 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); 2441} 2442EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2443 2444int sock_prot_inuse_get(struct net *net, struct proto *prot) 2445{ 2446 int cpu, idx = prot->inuse_idx; 2447 int res = 0; 2448 2449 for_each_possible_cpu(cpu) 2450 res += per_cpu(prot_inuse, cpu).val[idx]; 2451 2452 return res >= 0 ? res : 0; 2453} 2454EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2455#endif 2456 2457static void assign_proto_idx(struct proto *prot) 2458{ 2459 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); 2460 2461 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { 2462 pr_err("PROTO_INUSE_NR exhausted\n"); 2463 return; 2464 } 2465 2466 set_bit(prot->inuse_idx, proto_inuse_idx); 2467} 2468 2469static void release_proto_idx(struct proto *prot) 2470{ 2471 if (prot->inuse_idx != PROTO_INUSE_NR - 1) 2472 clear_bit(prot->inuse_idx, proto_inuse_idx); 2473} 2474#else 2475static inline void assign_proto_idx(struct proto *prot) 2476{ 2477} 2478 2479static inline void release_proto_idx(struct proto *prot) 2480{ 2481} 2482#endif 2483 2484int proto_register(struct proto *prot, int alloc_slab) 2485{ 2486 if (alloc_slab) { 2487 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 2488 SLAB_HWCACHE_ALIGN | prot->slab_flags, 2489 NULL); 2490 2491 if (prot->slab == NULL) { 2492 pr_crit("%s: Can't create sock SLAB cache!\n", 2493 prot->name); 2494 goto out; 2495 } 2496 2497 if (prot->rsk_prot != NULL) { 2498 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); 2499 if (prot->rsk_prot->slab_name == NULL) 2500 goto out_free_sock_slab; 2501 2502 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, 2503 prot->rsk_prot->obj_size, 0, 2504 SLAB_HWCACHE_ALIGN, NULL); 2505 2506 if (prot->rsk_prot->slab == NULL) { 2507 pr_crit("%s: Can't create request sock SLAB cache!\n", 2508 prot->name); 2509 goto out_free_request_sock_slab_name; 2510 } 2511 } 2512 2513 if (prot->twsk_prot != NULL) { 2514 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); 2515 2516 if (prot->twsk_prot->twsk_slab_name == NULL) 2517 goto out_free_request_sock_slab; 2518 2519 prot->twsk_prot->twsk_slab = 2520 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2521 prot->twsk_prot->twsk_obj_size, 2522 0, 2523 SLAB_HWCACHE_ALIGN | 2524 prot->slab_flags, 2525 NULL); 2526 if (prot->twsk_prot->twsk_slab == NULL) 2527 goto out_free_timewait_sock_slab_name; 2528 } 2529 } 2530 2531 mutex_lock(&proto_list_mutex); 2532 list_add(&prot->node, &proto_list); 2533 assign_proto_idx(prot); 2534 mutex_unlock(&proto_list_mutex); 2535 return 0; 2536 2537out_free_timewait_sock_slab_name: 2538 kfree(prot->twsk_prot->twsk_slab_name); 2539out_free_request_sock_slab: 2540 if (prot->rsk_prot && prot->rsk_prot->slab) { 2541 kmem_cache_destroy(prot->rsk_prot->slab); 2542 prot->rsk_prot->slab = NULL; 2543 } 2544out_free_request_sock_slab_name: 2545 if (prot->rsk_prot) 2546 kfree(prot->rsk_prot->slab_name); 2547out_free_sock_slab: 2548 kmem_cache_destroy(prot->slab); 2549 prot->slab = NULL; 2550out: 2551 return -ENOBUFS; 2552} 2553EXPORT_SYMBOL(proto_register); 2554 2555void proto_unregister(struct proto *prot) 2556{ 2557 mutex_lock(&proto_list_mutex); 2558 release_proto_idx(prot); 2559 list_del(&prot->node); 2560 mutex_unlock(&proto_list_mutex); 2561 2562 if (prot->slab != NULL) { 2563 kmem_cache_destroy(prot->slab); 2564 prot->slab = NULL; 2565 } 2566 2567 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { 2568 kmem_cache_destroy(prot->rsk_prot->slab); 2569 kfree(prot->rsk_prot->slab_name); 2570 prot->rsk_prot->slab = NULL; 2571 } 2572 2573 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { 2574 kmem_cache_destroy(prot->twsk_prot->twsk_slab); 2575 kfree(prot->twsk_prot->twsk_slab_name); 2576 prot->twsk_prot->twsk_slab = NULL; 2577 } 2578} 2579EXPORT_SYMBOL(proto_unregister); 2580 2581#ifdef CONFIG_PROC_FS 2582static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2583 __acquires(proto_list_mutex) 2584{ 2585 mutex_lock(&proto_list_mutex); 2586 return seq_list_start_head(&proto_list, *pos); 2587} 2588 2589static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2590{ 2591 return seq_list_next(v, &proto_list, pos); 2592} 2593 2594static void proto_seq_stop(struct seq_file *seq, void *v) 2595 __releases(proto_list_mutex) 2596{ 2597 mutex_unlock(&proto_list_mutex); 2598} 2599 2600static char proto_method_implemented(const void *method) 2601{ 2602 return method == NULL ? 'n' : 'y'; 2603} 2604static long sock_prot_memory_allocated(struct proto *proto) 2605{ 2606 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; 2607} 2608 2609static char *sock_prot_memory_pressure(struct proto *proto) 2610{ 2611 return proto->memory_pressure != NULL ? 2612 proto_memory_pressure(proto) ? "yes" : "no" : "NI"; 2613} 2614 2615static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2616{ 2617 2618 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 2619 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2620 proto->name, 2621 proto->obj_size, 2622 sock_prot_inuse_get(seq_file_net(seq), proto), 2623 sock_prot_memory_allocated(proto), 2624 sock_prot_memory_pressure(proto), 2625 proto->max_header, 2626 proto->slab == NULL ? "no" : "yes", 2627 module_name(proto->owner), 2628 proto_method_implemented(proto->close), 2629 proto_method_implemented(proto->connect), 2630 proto_method_implemented(proto->disconnect), 2631 proto_method_implemented(proto->accept), 2632 proto_method_implemented(proto->ioctl), 2633 proto_method_implemented(proto->init), 2634 proto_method_implemented(proto->destroy), 2635 proto_method_implemented(proto->shutdown), 2636 proto_method_implemented(proto->setsockopt), 2637 proto_method_implemented(proto->getsockopt), 2638 proto_method_implemented(proto->sendmsg), 2639 proto_method_implemented(proto->recvmsg), 2640 proto_method_implemented(proto->sendpage), 2641 proto_method_implemented(proto->bind), 2642 proto_method_implemented(proto->backlog_rcv), 2643 proto_method_implemented(proto->hash), 2644 proto_method_implemented(proto->unhash), 2645 proto_method_implemented(proto->get_port), 2646 proto_method_implemented(proto->enter_memory_pressure)); 2647} 2648 2649static int proto_seq_show(struct seq_file *seq, void *v) 2650{ 2651 if (v == &proto_list) 2652 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 2653 "protocol", 2654 "size", 2655 "sockets", 2656 "memory", 2657 "press", 2658 "maxhdr", 2659 "slab", 2660 "module", 2661 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 2662 else 2663 proto_seq_printf(seq, list_entry(v, struct proto, node)); 2664 return 0; 2665} 2666 2667static const struct seq_operations proto_seq_ops = { 2668 .start = proto_seq_start, 2669 .next = proto_seq_next, 2670 .stop = proto_seq_stop, 2671 .show = proto_seq_show, 2672}; 2673 2674static int proto_seq_open(struct inode *inode, struct file *file) 2675{ 2676 return seq_open_net(inode, file, &proto_seq_ops, 2677 sizeof(struct seq_net_private)); 2678} 2679 2680static const struct file_operations proto_seq_fops = { 2681 .owner = THIS_MODULE, 2682 .open = proto_seq_open, 2683 .read = seq_read, 2684 .llseek = seq_lseek, 2685 .release = seq_release_net, 2686}; 2687 2688static __net_init int proto_init_net(struct net *net) 2689{ 2690 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) 2691 return -ENOMEM; 2692 2693 return 0; 2694} 2695 2696static __net_exit void proto_exit_net(struct net *net) 2697{ 2698 proc_net_remove(net, "protocols"); 2699} 2700 2701 2702static __net_initdata struct pernet_operations proto_net_ops = { 2703 .init = proto_init_net, 2704 .exit = proto_exit_net, 2705}; 2706 2707static int __init proto_init(void) 2708{ 2709 return register_pernet_subsys(&proto_net_ops); 2710} 2711 2712subsys_initcall(proto_init); 2713 2714#endif /* PROC_FS */ 2715