sock.c revision 8a8e04df4747661daaee77e98e102d99c9e09b98
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Generic socket support routines. Memory allocators, socket lock/release 7 * handler for protocols to use and generic option handler. 8 * 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Fixes: 16 * Alan Cox : Numerous verify_area() problems 17 * Alan Cox : Connecting on a connecting socket 18 * now returns an error for tcp. 19 * Alan Cox : sock->protocol is set correctly. 20 * and is not sometimes left as 0. 21 * Alan Cox : connect handles icmp errors on a 22 * connect properly. Unfortunately there 23 * is a restart syscall nasty there. I 24 * can't match BSD without hacking the C 25 * library. Ideas urgently sought! 26 * Alan Cox : Disallow bind() to addresses that are 27 * not ours - especially broadcast ones!! 28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 30 * instead they leave that for the DESTROY timer. 31 * Alan Cox : Clean up error flag in accept 32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 33 * was buggy. Put a remove_sock() in the handler 34 * for memory when we hit 0. Also altered the timer 35 * code. The ACK stuff can wait and needs major 36 * TCP layer surgery. 37 * Alan Cox : Fixed TCP ack bug, removed remove sock 38 * and fixed timer/inet_bh race. 39 * Alan Cox : Added zapped flag for TCP 40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 45 * Rick Sladkey : Relaxed UDP rules for matching packets. 46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 47 * Pauline Middelink : identd support 48 * Alan Cox : Fixed connect() taking signals I think. 49 * Alan Cox : SO_LINGER supported 50 * Alan Cox : Error reporting fixes 51 * Anonymous : inet_create tidied up (sk->reuse setting) 52 * Alan Cox : inet sockets don't set sk->type! 53 * Alan Cox : Split socket option code 54 * Alan Cox : Callbacks 55 * Alan Cox : Nagle flag for Charles & Johannes stuff 56 * Alex : Removed restriction on inet fioctl 57 * Alan Cox : Splitting INET from NET core 58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 60 * Alan Cox : Split IP from generic code 61 * Alan Cox : New kfree_skbmem() 62 * Alan Cox : Make SO_DEBUG superuser only. 63 * Alan Cox : Allow anyone to clear SO_DEBUG 64 * (compatibility fix) 65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 66 * Alan Cox : Allocator for a socket is settable. 67 * Alan Cox : SO_ERROR includes soft errors. 68 * Alan Cox : Allow NULL arguments on some SO_ opts 69 * Alan Cox : Generic socket allocation to make hooks 70 * easier (suggested by Craig Metz). 71 * Michael Pall : SO_ERROR returns positive errno again 72 * Steve Whitehouse: Added default destructor to free 73 * protocol private data. 74 * Steve Whitehouse: Added various other default routines 75 * common to several socket families. 76 * Chris Evans : Call suser() check last on F_SETOWN 77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 79 * Andi Kleen : Fix write_space callback 80 * Chris Evans : Security fixes - signedness again 81 * Arnaldo C. Melo : cleanups, use skb_queue_purge 82 * 83 * To Fix: 84 * 85 * 86 * This program is free software; you can redistribute it and/or 87 * modify it under the terms of the GNU General Public License 88 * as published by the Free Software Foundation; either version 89 * 2 of the License, or (at your option) any later version. 90 */ 91 92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 93 94#include <linux/capability.h> 95#include <linux/errno.h> 96#include <linux/types.h> 97#include <linux/socket.h> 98#include <linux/in.h> 99#include <linux/kernel.h> 100#include <linux/module.h> 101#include <linux/proc_fs.h> 102#include <linux/seq_file.h> 103#include <linux/sched.h> 104#include <linux/timer.h> 105#include <linux/string.h> 106#include <linux/sockios.h> 107#include <linux/net.h> 108#include <linux/mm.h> 109#include <linux/slab.h> 110#include <linux/interrupt.h> 111#include <linux/poll.h> 112#include <linux/tcp.h> 113#include <linux/init.h> 114#include <linux/highmem.h> 115#include <linux/user_namespace.h> 116#include <linux/static_key.h> 117#include <linux/memcontrol.h> 118#include <linux/prefetch.h> 119 120#include <asm/uaccess.h> 121 122#include <linux/netdevice.h> 123#include <net/protocol.h> 124#include <linux/skbuff.h> 125#include <net/net_namespace.h> 126#include <net/request_sock.h> 127#include <net/sock.h> 128#include <linux/net_tstamp.h> 129#include <net/xfrm.h> 130#include <linux/ipsec.h> 131#include <net/cls_cgroup.h> 132#include <net/netprio_cgroup.h> 133 134#include <linux/filter.h> 135 136#include <trace/events/sock.h> 137 138#ifdef CONFIG_INET 139#include <net/tcp.h> 140#endif 141 142static DEFINE_MUTEX(proto_list_mutex); 143static LIST_HEAD(proto_list); 144 145#ifdef CONFIG_MEMCG_KMEM 146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 147{ 148 struct proto *proto; 149 int ret = 0; 150 151 mutex_lock(&proto_list_mutex); 152 list_for_each_entry(proto, &proto_list, node) { 153 if (proto->init_cgroup) { 154 ret = proto->init_cgroup(memcg, ss); 155 if (ret) 156 goto out; 157 } 158 } 159 160 mutex_unlock(&proto_list_mutex); 161 return ret; 162out: 163 list_for_each_entry_continue_reverse(proto, &proto_list, node) 164 if (proto->destroy_cgroup) 165 proto->destroy_cgroup(memcg); 166 mutex_unlock(&proto_list_mutex); 167 return ret; 168} 169 170void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) 171{ 172 struct proto *proto; 173 174 mutex_lock(&proto_list_mutex); 175 list_for_each_entry_reverse(proto, &proto_list, node) 176 if (proto->destroy_cgroup) 177 proto->destroy_cgroup(memcg); 178 mutex_unlock(&proto_list_mutex); 179} 180#endif 181 182/* 183 * Each address family might have different locking rules, so we have 184 * one slock key per address family: 185 */ 186static struct lock_class_key af_family_keys[AF_MAX]; 187static struct lock_class_key af_family_slock_keys[AF_MAX]; 188 189struct static_key memcg_socket_limit_enabled; 190EXPORT_SYMBOL(memcg_socket_limit_enabled); 191 192/* 193 * Make lock validator output more readable. (we pre-construct these 194 * strings build-time, so that runtime initialization of socket 195 * locks is fast): 196 */ 197static const char *const af_family_key_strings[AF_MAX+1] = { 198 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , 199 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", 200 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , 201 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , 202 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 203 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 204 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 205 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 206 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 207 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 208 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 209 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 210 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 211 "sk_lock-AF_NFC" , "sk_lock-AF_MAX" 212}; 213static const char *const af_family_slock_key_strings[AF_MAX+1] = { 214 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 215 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", 216 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , 217 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , 218 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 219 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 220 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 221 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , 222 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 223 "slock-27" , "slock-28" , "slock-AF_CAN" , 224 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 225 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 226 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 227 "slock-AF_NFC" , "slock-AF_MAX" 228}; 229static const char *const af_family_clock_key_strings[AF_MAX+1] = { 230 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 231 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", 232 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , 233 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , 234 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 235 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 236 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 237 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , 238 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 239 "clock-27" , "clock-28" , "clock-AF_CAN" , 240 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 241 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 242 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 243 "clock-AF_NFC" , "clock-AF_MAX" 244}; 245 246/* 247 * sk_callback_lock locking rules are per-address-family, 248 * so split the lock classes by using a per-AF key: 249 */ 250static struct lock_class_key af_callback_keys[AF_MAX]; 251 252/* Take into consideration the size of the struct sk_buff overhead in the 253 * determination of these values, since that is non-constant across 254 * platforms. This makes socket queueing behavior and performance 255 * not depend upon such differences. 256 */ 257#define _SK_MEM_PACKETS 256 258#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) 259#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 260#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 261 262/* Run time adjustable parameters. */ 263__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 264EXPORT_SYMBOL(sysctl_wmem_max); 265__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 266EXPORT_SYMBOL(sysctl_rmem_max); 267__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 268__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 269 270/* Maximal space eaten by iovec or ancillary data plus some space */ 271int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 272EXPORT_SYMBOL(sysctl_optmem_max); 273 274struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; 275EXPORT_SYMBOL_GPL(memalloc_socks); 276 277/** 278 * sk_set_memalloc - sets %SOCK_MEMALLOC 279 * @sk: socket to set it on 280 * 281 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. 282 * It's the responsibility of the admin to adjust min_free_kbytes 283 * to meet the requirements 284 */ 285void sk_set_memalloc(struct sock *sk) 286{ 287 sock_set_flag(sk, SOCK_MEMALLOC); 288 sk->sk_allocation |= __GFP_MEMALLOC; 289 static_key_slow_inc(&memalloc_socks); 290} 291EXPORT_SYMBOL_GPL(sk_set_memalloc); 292 293void sk_clear_memalloc(struct sock *sk) 294{ 295 sock_reset_flag(sk, SOCK_MEMALLOC); 296 sk->sk_allocation &= ~__GFP_MEMALLOC; 297 static_key_slow_dec(&memalloc_socks); 298 299 /* 300 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward 301 * progress of swapping. However, if SOCK_MEMALLOC is cleared while 302 * it has rmem allocations there is a risk that the user of the 303 * socket cannot make forward progress due to exceeding the rmem 304 * limits. By rights, sk_clear_memalloc() should only be called 305 * on sockets being torn down but warn and reset the accounting if 306 * that assumption breaks. 307 */ 308 if (WARN_ON(sk->sk_forward_alloc)) 309 sk_mem_reclaim(sk); 310} 311EXPORT_SYMBOL_GPL(sk_clear_memalloc); 312 313int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 314{ 315 int ret; 316 unsigned long pflags = current->flags; 317 318 /* these should have been dropped before queueing */ 319 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); 320 321 current->flags |= PF_MEMALLOC; 322 ret = sk->sk_backlog_rcv(sk, skb); 323 tsk_restore_flags(current, pflags, PF_MEMALLOC); 324 325 return ret; 326} 327EXPORT_SYMBOL(__sk_backlog_rcv); 328 329static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 330{ 331 struct timeval tv; 332 333 if (optlen < sizeof(tv)) 334 return -EINVAL; 335 if (copy_from_user(&tv, optval, sizeof(tv))) 336 return -EFAULT; 337 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) 338 return -EDOM; 339 340 if (tv.tv_sec < 0) { 341 static int warned __read_mostly; 342 343 *timeo_p = 0; 344 if (warned < 10 && net_ratelimit()) { 345 warned++; 346 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", 347 __func__, current->comm, task_pid_nr(current)); 348 } 349 return 0; 350 } 351 *timeo_p = MAX_SCHEDULE_TIMEOUT; 352 if (tv.tv_sec == 0 && tv.tv_usec == 0) 353 return 0; 354 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) 355 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); 356 return 0; 357} 358 359static void sock_warn_obsolete_bsdism(const char *name) 360{ 361 static int warned; 362 static char warncomm[TASK_COMM_LEN]; 363 if (strcmp(warncomm, current->comm) && warned < 5) { 364 strcpy(warncomm, current->comm); 365 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", 366 warncomm, name); 367 warned++; 368 } 369} 370 371#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) 372 373static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 374{ 375 if (sk->sk_flags & flags) { 376 sk->sk_flags &= ~flags; 377 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 378 net_disable_timestamp(); 379 } 380} 381 382 383int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 384{ 385 int err; 386 int skb_len; 387 unsigned long flags; 388 struct sk_buff_head *list = &sk->sk_receive_queue; 389 390 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { 391 atomic_inc(&sk->sk_drops); 392 trace_sock_rcvqueue_full(sk, skb); 393 return -ENOMEM; 394 } 395 396 err = sk_filter(sk, skb); 397 if (err) 398 return err; 399 400 if (!sk_rmem_schedule(sk, skb, skb->truesize)) { 401 atomic_inc(&sk->sk_drops); 402 return -ENOBUFS; 403 } 404 405 skb->dev = NULL; 406 skb_set_owner_r(skb, sk); 407 408 /* Cache the SKB length before we tack it onto the receive 409 * queue. Once it is added it no longer belongs to us and 410 * may be freed by other threads of control pulling packets 411 * from the queue. 412 */ 413 skb_len = skb->len; 414 415 /* we escape from rcu protected region, make sure we dont leak 416 * a norefcounted dst 417 */ 418 skb_dst_force(skb); 419 420 spin_lock_irqsave(&list->lock, flags); 421 skb->dropcount = atomic_read(&sk->sk_drops); 422 __skb_queue_tail(list, skb); 423 spin_unlock_irqrestore(&list->lock, flags); 424 425 if (!sock_flag(sk, SOCK_DEAD)) 426 sk->sk_data_ready(sk, skb_len); 427 return 0; 428} 429EXPORT_SYMBOL(sock_queue_rcv_skb); 430 431int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) 432{ 433 int rc = NET_RX_SUCCESS; 434 435 if (sk_filter(sk, skb)) 436 goto discard_and_relse; 437 438 skb->dev = NULL; 439 440 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { 441 atomic_inc(&sk->sk_drops); 442 goto discard_and_relse; 443 } 444 if (nested) 445 bh_lock_sock_nested(sk); 446 else 447 bh_lock_sock(sk); 448 if (!sock_owned_by_user(sk)) { 449 /* 450 * trylock + unlock semantics: 451 */ 452 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); 453 454 rc = sk_backlog_rcv(sk, skb); 455 456 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 457 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 458 bh_unlock_sock(sk); 459 atomic_inc(&sk->sk_drops); 460 goto discard_and_relse; 461 } 462 463 bh_unlock_sock(sk); 464out: 465 sock_put(sk); 466 return rc; 467discard_and_relse: 468 kfree_skb(skb); 469 goto out; 470} 471EXPORT_SYMBOL(sk_receive_skb); 472 473void sk_reset_txq(struct sock *sk) 474{ 475 sk_tx_queue_clear(sk); 476} 477EXPORT_SYMBOL(sk_reset_txq); 478 479struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 480{ 481 struct dst_entry *dst = __sk_dst_get(sk); 482 483 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 484 sk_tx_queue_clear(sk); 485 RCU_INIT_POINTER(sk->sk_dst_cache, NULL); 486 dst_release(dst); 487 return NULL; 488 } 489 490 return dst; 491} 492EXPORT_SYMBOL(__sk_dst_check); 493 494struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) 495{ 496 struct dst_entry *dst = sk_dst_get(sk); 497 498 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 499 sk_dst_reset(sk); 500 dst_release(dst); 501 return NULL; 502 } 503 504 return dst; 505} 506EXPORT_SYMBOL(sk_dst_check); 507 508static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) 509{ 510 int ret = -ENOPROTOOPT; 511#ifdef CONFIG_NETDEVICES 512 struct net *net = sock_net(sk); 513 char devname[IFNAMSIZ]; 514 int index; 515 516 /* Sorry... */ 517 ret = -EPERM; 518 if (!capable(CAP_NET_RAW)) 519 goto out; 520 521 ret = -EINVAL; 522 if (optlen < 0) 523 goto out; 524 525 /* Bind this socket to a particular device like "eth0", 526 * as specified in the passed interface name. If the 527 * name is "" or the option length is zero the socket 528 * is not bound. 529 */ 530 if (optlen > IFNAMSIZ - 1) 531 optlen = IFNAMSIZ - 1; 532 memset(devname, 0, sizeof(devname)); 533 534 ret = -EFAULT; 535 if (copy_from_user(devname, optval, optlen)) 536 goto out; 537 538 index = 0; 539 if (devname[0] != '\0') { 540 struct net_device *dev; 541 542 rcu_read_lock(); 543 dev = dev_get_by_name_rcu(net, devname); 544 if (dev) 545 index = dev->ifindex; 546 rcu_read_unlock(); 547 ret = -ENODEV; 548 if (!dev) 549 goto out; 550 } 551 552 lock_sock(sk); 553 sk->sk_bound_dev_if = index; 554 sk_dst_reset(sk); 555 release_sock(sk); 556 557 ret = 0; 558 559out: 560#endif 561 562 return ret; 563} 564 565static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) 566{ 567 if (valbool) 568 sock_set_flag(sk, bit); 569 else 570 sock_reset_flag(sk, bit); 571} 572 573/* 574 * This is meant for all protocols to use and covers goings on 575 * at the socket level. Everything here is generic. 576 */ 577 578int sock_setsockopt(struct socket *sock, int level, int optname, 579 char __user *optval, unsigned int optlen) 580{ 581 struct sock *sk = sock->sk; 582 int val; 583 int valbool; 584 struct linger ling; 585 int ret = 0; 586 587 /* 588 * Options without arguments 589 */ 590 591 if (optname == SO_BINDTODEVICE) 592 return sock_bindtodevice(sk, optval, optlen); 593 594 if (optlen < sizeof(int)) 595 return -EINVAL; 596 597 if (get_user(val, (int __user *)optval)) 598 return -EFAULT; 599 600 valbool = val ? 1 : 0; 601 602 lock_sock(sk); 603 604 switch (optname) { 605 case SO_DEBUG: 606 if (val && !capable(CAP_NET_ADMIN)) 607 ret = -EACCES; 608 else 609 sock_valbool_flag(sk, SOCK_DBG, valbool); 610 break; 611 case SO_REUSEADDR: 612 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 613 break; 614 case SO_TYPE: 615 case SO_PROTOCOL: 616 case SO_DOMAIN: 617 case SO_ERROR: 618 ret = -ENOPROTOOPT; 619 break; 620 case SO_DONTROUTE: 621 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); 622 break; 623 case SO_BROADCAST: 624 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 625 break; 626 case SO_SNDBUF: 627 /* Don't error on this BSD doesn't and if you think 628 * about it this is right. Otherwise apps have to 629 * play 'guess the biggest size' games. RCVBUF/SNDBUF 630 * are treated in BSD as hints 631 */ 632 val = min_t(u32, val, sysctl_wmem_max); 633set_sndbuf: 634 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 635 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); 636 /* Wake up sending tasks if we upped the value. */ 637 sk->sk_write_space(sk); 638 break; 639 640 case SO_SNDBUFFORCE: 641 if (!capable(CAP_NET_ADMIN)) { 642 ret = -EPERM; 643 break; 644 } 645 goto set_sndbuf; 646 647 case SO_RCVBUF: 648 /* Don't error on this BSD doesn't and if you think 649 * about it this is right. Otherwise apps have to 650 * play 'guess the biggest size' games. RCVBUF/SNDBUF 651 * are treated in BSD as hints 652 */ 653 val = min_t(u32, val, sysctl_rmem_max); 654set_rcvbuf: 655 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 656 /* 657 * We double it on the way in to account for 658 * "struct sk_buff" etc. overhead. Applications 659 * assume that the SO_RCVBUF setting they make will 660 * allow that much actual data to be received on that 661 * socket. 662 * 663 * Applications are unaware that "struct sk_buff" and 664 * other overheads allocate from the receive buffer 665 * during socket buffer allocation. 666 * 667 * And after considering the possible alternatives, 668 * returning the value we actually used in getsockopt 669 * is the most desirable behavior. 670 */ 671 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); 672 break; 673 674 case SO_RCVBUFFORCE: 675 if (!capable(CAP_NET_ADMIN)) { 676 ret = -EPERM; 677 break; 678 } 679 goto set_rcvbuf; 680 681 case SO_KEEPALIVE: 682#ifdef CONFIG_INET 683 if (sk->sk_protocol == IPPROTO_TCP) 684 tcp_set_keepalive(sk, valbool); 685#endif 686 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 687 break; 688 689 case SO_OOBINLINE: 690 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 691 break; 692 693 case SO_NO_CHECK: 694 sk->sk_no_check = valbool; 695 break; 696 697 case SO_PRIORITY: 698 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 699 sk->sk_priority = val; 700 else 701 ret = -EPERM; 702 break; 703 704 case SO_LINGER: 705 if (optlen < sizeof(ling)) { 706 ret = -EINVAL; /* 1003.1g */ 707 break; 708 } 709 if (copy_from_user(&ling, optval, sizeof(ling))) { 710 ret = -EFAULT; 711 break; 712 } 713 if (!ling.l_onoff) 714 sock_reset_flag(sk, SOCK_LINGER); 715 else { 716#if (BITS_PER_LONG == 32) 717 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 718 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 719 else 720#endif 721 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 722 sock_set_flag(sk, SOCK_LINGER); 723 } 724 break; 725 726 case SO_BSDCOMPAT: 727 sock_warn_obsolete_bsdism("setsockopt"); 728 break; 729 730 case SO_PASSCRED: 731 if (valbool) 732 set_bit(SOCK_PASSCRED, &sock->flags); 733 else 734 clear_bit(SOCK_PASSCRED, &sock->flags); 735 break; 736 737 case SO_TIMESTAMP: 738 case SO_TIMESTAMPNS: 739 if (valbool) { 740 if (optname == SO_TIMESTAMP) 741 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 742 else 743 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 744 sock_set_flag(sk, SOCK_RCVTSTAMP); 745 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 746 } else { 747 sock_reset_flag(sk, SOCK_RCVTSTAMP); 748 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 749 } 750 break; 751 752 case SO_TIMESTAMPING: 753 if (val & ~SOF_TIMESTAMPING_MASK) { 754 ret = -EINVAL; 755 break; 756 } 757 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, 758 val & SOF_TIMESTAMPING_TX_HARDWARE); 759 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, 760 val & SOF_TIMESTAMPING_TX_SOFTWARE); 761 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, 762 val & SOF_TIMESTAMPING_RX_HARDWARE); 763 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 764 sock_enable_timestamp(sk, 765 SOCK_TIMESTAMPING_RX_SOFTWARE); 766 else 767 sock_disable_timestamp(sk, 768 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 769 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, 770 val & SOF_TIMESTAMPING_SOFTWARE); 771 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, 772 val & SOF_TIMESTAMPING_SYS_HARDWARE); 773 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, 774 val & SOF_TIMESTAMPING_RAW_HARDWARE); 775 break; 776 777 case SO_RCVLOWAT: 778 if (val < 0) 779 val = INT_MAX; 780 sk->sk_rcvlowat = val ? : 1; 781 break; 782 783 case SO_RCVTIMEO: 784 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 785 break; 786 787 case SO_SNDTIMEO: 788 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 789 break; 790 791 case SO_ATTACH_FILTER: 792 ret = -EINVAL; 793 if (optlen == sizeof(struct sock_fprog)) { 794 struct sock_fprog fprog; 795 796 ret = -EFAULT; 797 if (copy_from_user(&fprog, optval, sizeof(fprog))) 798 break; 799 800 ret = sk_attach_filter(&fprog, sk); 801 } 802 break; 803 804 case SO_DETACH_FILTER: 805 ret = sk_detach_filter(sk); 806 break; 807 808 case SO_PASSSEC: 809 if (valbool) 810 set_bit(SOCK_PASSSEC, &sock->flags); 811 else 812 clear_bit(SOCK_PASSSEC, &sock->flags); 813 break; 814 case SO_MARK: 815 if (!capable(CAP_NET_ADMIN)) 816 ret = -EPERM; 817 else 818 sk->sk_mark = val; 819 break; 820 821 /* We implement the SO_SNDLOWAT etc to 822 not be settable (1003.1g 5.3) */ 823 case SO_RXQ_OVFL: 824 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 825 break; 826 827 case SO_WIFI_STATUS: 828 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 829 break; 830 831 case SO_PEEK_OFF: 832 if (sock->ops->set_peek_off) 833 sock->ops->set_peek_off(sk, val); 834 else 835 ret = -EOPNOTSUPP; 836 break; 837 838 case SO_NOFCS: 839 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 840 break; 841 842 default: 843 ret = -ENOPROTOOPT; 844 break; 845 } 846 release_sock(sk); 847 return ret; 848} 849EXPORT_SYMBOL(sock_setsockopt); 850 851 852void cred_to_ucred(struct pid *pid, const struct cred *cred, 853 struct ucred *ucred) 854{ 855 ucred->pid = pid_vnr(pid); 856 ucred->uid = ucred->gid = -1; 857 if (cred) { 858 struct user_namespace *current_ns = current_user_ns(); 859 860 ucred->uid = from_kuid(current_ns, cred->euid); 861 ucred->gid = from_kgid(current_ns, cred->egid); 862 } 863} 864EXPORT_SYMBOL_GPL(cred_to_ucred); 865 866int sock_getsockopt(struct socket *sock, int level, int optname, 867 char __user *optval, int __user *optlen) 868{ 869 struct sock *sk = sock->sk; 870 871 union { 872 int val; 873 struct linger ling; 874 struct timeval tm; 875 } v; 876 877 int lv = sizeof(int); 878 int len; 879 880 if (get_user(len, optlen)) 881 return -EFAULT; 882 if (len < 0) 883 return -EINVAL; 884 885 memset(&v, 0, sizeof(v)); 886 887 switch (optname) { 888 case SO_DEBUG: 889 v.val = sock_flag(sk, SOCK_DBG); 890 break; 891 892 case SO_DONTROUTE: 893 v.val = sock_flag(sk, SOCK_LOCALROUTE); 894 break; 895 896 case SO_BROADCAST: 897 v.val = sock_flag(sk, SOCK_BROADCAST); 898 break; 899 900 case SO_SNDBUF: 901 v.val = sk->sk_sndbuf; 902 break; 903 904 case SO_RCVBUF: 905 v.val = sk->sk_rcvbuf; 906 break; 907 908 case SO_REUSEADDR: 909 v.val = sk->sk_reuse; 910 break; 911 912 case SO_KEEPALIVE: 913 v.val = sock_flag(sk, SOCK_KEEPOPEN); 914 break; 915 916 case SO_TYPE: 917 v.val = sk->sk_type; 918 break; 919 920 case SO_PROTOCOL: 921 v.val = sk->sk_protocol; 922 break; 923 924 case SO_DOMAIN: 925 v.val = sk->sk_family; 926 break; 927 928 case SO_ERROR: 929 v.val = -sock_error(sk); 930 if (v.val == 0) 931 v.val = xchg(&sk->sk_err_soft, 0); 932 break; 933 934 case SO_OOBINLINE: 935 v.val = sock_flag(sk, SOCK_URGINLINE); 936 break; 937 938 case SO_NO_CHECK: 939 v.val = sk->sk_no_check; 940 break; 941 942 case SO_PRIORITY: 943 v.val = sk->sk_priority; 944 break; 945 946 case SO_LINGER: 947 lv = sizeof(v.ling); 948 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); 949 v.ling.l_linger = sk->sk_lingertime / HZ; 950 break; 951 952 case SO_BSDCOMPAT: 953 sock_warn_obsolete_bsdism("getsockopt"); 954 break; 955 956 case SO_TIMESTAMP: 957 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && 958 !sock_flag(sk, SOCK_RCVTSTAMPNS); 959 break; 960 961 case SO_TIMESTAMPNS: 962 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 963 break; 964 965 case SO_TIMESTAMPING: 966 v.val = 0; 967 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 968 v.val |= SOF_TIMESTAMPING_TX_HARDWARE; 969 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 970 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; 971 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) 972 v.val |= SOF_TIMESTAMPING_RX_HARDWARE; 973 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) 974 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; 975 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) 976 v.val |= SOF_TIMESTAMPING_SOFTWARE; 977 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) 978 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; 979 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) 980 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; 981 break; 982 983 case SO_RCVTIMEO: 984 lv = sizeof(struct timeval); 985 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 986 v.tm.tv_sec = 0; 987 v.tm.tv_usec = 0; 988 } else { 989 v.tm.tv_sec = sk->sk_rcvtimeo / HZ; 990 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; 991 } 992 break; 993 994 case SO_SNDTIMEO: 995 lv = sizeof(struct timeval); 996 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 997 v.tm.tv_sec = 0; 998 v.tm.tv_usec = 0; 999 } else { 1000 v.tm.tv_sec = sk->sk_sndtimeo / HZ; 1001 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; 1002 } 1003 break; 1004 1005 case SO_RCVLOWAT: 1006 v.val = sk->sk_rcvlowat; 1007 break; 1008 1009 case SO_SNDLOWAT: 1010 v.val = 1; 1011 break; 1012 1013 case SO_PASSCRED: 1014 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); 1015 break; 1016 1017 case SO_PEERCRED: 1018 { 1019 struct ucred peercred; 1020 if (len > sizeof(peercred)) 1021 len = sizeof(peercred); 1022 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); 1023 if (copy_to_user(optval, &peercred, len)) 1024 return -EFAULT; 1025 goto lenout; 1026 } 1027 1028 case SO_PEERNAME: 1029 { 1030 char address[128]; 1031 1032 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 1033 return -ENOTCONN; 1034 if (lv < len) 1035 return -EINVAL; 1036 if (copy_to_user(optval, address, len)) 1037 return -EFAULT; 1038 goto lenout; 1039 } 1040 1041 /* Dubious BSD thing... Probably nobody even uses it, but 1042 * the UNIX standard wants it for whatever reason... -DaveM 1043 */ 1044 case SO_ACCEPTCONN: 1045 v.val = sk->sk_state == TCP_LISTEN; 1046 break; 1047 1048 case SO_PASSSEC: 1049 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); 1050 break; 1051 1052 case SO_PEERSEC: 1053 return security_socket_getpeersec_stream(sock, optval, optlen, len); 1054 1055 case SO_MARK: 1056 v.val = sk->sk_mark; 1057 break; 1058 1059 case SO_RXQ_OVFL: 1060 v.val = sock_flag(sk, SOCK_RXQ_OVFL); 1061 break; 1062 1063 case SO_WIFI_STATUS: 1064 v.val = sock_flag(sk, SOCK_WIFI_STATUS); 1065 break; 1066 1067 case SO_PEEK_OFF: 1068 if (!sock->ops->set_peek_off) 1069 return -EOPNOTSUPP; 1070 1071 v.val = sk->sk_peek_off; 1072 break; 1073 case SO_NOFCS: 1074 v.val = sock_flag(sk, SOCK_NOFCS); 1075 break; 1076 default: 1077 return -ENOPROTOOPT; 1078 } 1079 1080 if (len > lv) 1081 len = lv; 1082 if (copy_to_user(optval, &v, len)) 1083 return -EFAULT; 1084lenout: 1085 if (put_user(len, optlen)) 1086 return -EFAULT; 1087 return 0; 1088} 1089 1090/* 1091 * Initialize an sk_lock. 1092 * 1093 * (We also register the sk_lock with the lock validator.) 1094 */ 1095static inline void sock_lock_init(struct sock *sk) 1096{ 1097 sock_lock_init_class_and_name(sk, 1098 af_family_slock_key_strings[sk->sk_family], 1099 af_family_slock_keys + sk->sk_family, 1100 af_family_key_strings[sk->sk_family], 1101 af_family_keys + sk->sk_family); 1102} 1103 1104/* 1105 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, 1106 * even temporarly, because of RCU lookups. sk_node should also be left as is. 1107 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end 1108 */ 1109static void sock_copy(struct sock *nsk, const struct sock *osk) 1110{ 1111#ifdef CONFIG_SECURITY_NETWORK 1112 void *sptr = nsk->sk_security; 1113#endif 1114 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); 1115 1116 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, 1117 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); 1118 1119#ifdef CONFIG_SECURITY_NETWORK 1120 nsk->sk_security = sptr; 1121 security_sk_clone(osk, nsk); 1122#endif 1123} 1124 1125/* 1126 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes 1127 * un-modified. Special care is taken when initializing object to zero. 1128 */ 1129static inline void sk_prot_clear_nulls(struct sock *sk, int size) 1130{ 1131 if (offsetof(struct sock, sk_node.next) != 0) 1132 memset(sk, 0, offsetof(struct sock, sk_node.next)); 1133 memset(&sk->sk_node.pprev, 0, 1134 size - offsetof(struct sock, sk_node.pprev)); 1135} 1136 1137void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) 1138{ 1139 unsigned long nulls1, nulls2; 1140 1141 nulls1 = offsetof(struct sock, __sk_common.skc_node.next); 1142 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); 1143 if (nulls1 > nulls2) 1144 swap(nulls1, nulls2); 1145 1146 if (nulls1 != 0) 1147 memset((char *)sk, 0, nulls1); 1148 memset((char *)sk + nulls1 + sizeof(void *), 0, 1149 nulls2 - nulls1 - sizeof(void *)); 1150 memset((char *)sk + nulls2 + sizeof(void *), 0, 1151 size - nulls2 - sizeof(void *)); 1152} 1153EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); 1154 1155static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1156 int family) 1157{ 1158 struct sock *sk; 1159 struct kmem_cache *slab; 1160 1161 slab = prot->slab; 1162 if (slab != NULL) { 1163 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); 1164 if (!sk) 1165 return sk; 1166 if (priority & __GFP_ZERO) { 1167 if (prot->clear_sk) 1168 prot->clear_sk(sk, prot->obj_size); 1169 else 1170 sk_prot_clear_nulls(sk, prot->obj_size); 1171 } 1172 } else 1173 sk = kmalloc(prot->obj_size, priority); 1174 1175 if (sk != NULL) { 1176 kmemcheck_annotate_bitfield(sk, flags); 1177 1178 if (security_sk_alloc(sk, family, priority)) 1179 goto out_free; 1180 1181 if (!try_module_get(prot->owner)) 1182 goto out_free_sec; 1183 sk_tx_queue_clear(sk); 1184 } 1185 1186 return sk; 1187 1188out_free_sec: 1189 security_sk_free(sk); 1190out_free: 1191 if (slab != NULL) 1192 kmem_cache_free(slab, sk); 1193 else 1194 kfree(sk); 1195 return NULL; 1196} 1197 1198static void sk_prot_free(struct proto *prot, struct sock *sk) 1199{ 1200 struct kmem_cache *slab; 1201 struct module *owner; 1202 1203 owner = prot->owner; 1204 slab = prot->slab; 1205 1206 security_sk_free(sk); 1207 if (slab != NULL) 1208 kmem_cache_free(slab, sk); 1209 else 1210 kfree(sk); 1211 module_put(owner); 1212} 1213 1214#ifdef CONFIG_CGROUPS 1215#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1216void sock_update_classid(struct sock *sk) 1217{ 1218 u32 classid; 1219 1220 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1221 classid = task_cls_classid(current); 1222 rcu_read_unlock(); 1223 if (classid && classid != sk->sk_classid) 1224 sk->sk_classid = classid; 1225} 1226EXPORT_SYMBOL(sock_update_classid); 1227#endif 1228 1229#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 1230void sock_update_netprioidx(struct sock *sk, struct task_struct *task) 1231{ 1232 if (in_interrupt()) 1233 return; 1234 1235 sk->sk_cgrp_prioidx = task_netprioidx(task); 1236} 1237EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1238#endif 1239#endif 1240 1241/** 1242 * sk_alloc - All socket objects are allocated here 1243 * @net: the applicable net namespace 1244 * @family: protocol family 1245 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1246 * @prot: struct proto associated with this new sock instance 1247 */ 1248struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1249 struct proto *prot) 1250{ 1251 struct sock *sk; 1252 1253 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); 1254 if (sk) { 1255 sk->sk_family = family; 1256 /* 1257 * See comment in struct sock definition to understand 1258 * why we need sk_prot_creator -acme 1259 */ 1260 sk->sk_prot = sk->sk_prot_creator = prot; 1261 sock_lock_init(sk); 1262 sock_net_set(sk, get_net(net)); 1263 atomic_set(&sk->sk_wmem_alloc, 1); 1264 1265 sock_update_classid(sk); 1266 sock_update_netprioidx(sk, current); 1267 } 1268 1269 return sk; 1270} 1271EXPORT_SYMBOL(sk_alloc); 1272 1273static void __sk_free(struct sock *sk) 1274{ 1275 struct sk_filter *filter; 1276 1277 if (sk->sk_destruct) 1278 sk->sk_destruct(sk); 1279 1280 filter = rcu_dereference_check(sk->sk_filter, 1281 atomic_read(&sk->sk_wmem_alloc) == 0); 1282 if (filter) { 1283 sk_filter_uncharge(sk, filter); 1284 RCU_INIT_POINTER(sk->sk_filter, NULL); 1285 } 1286 1287 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 1288 1289 if (atomic_read(&sk->sk_omem_alloc)) 1290 pr_debug("%s: optmem leakage (%d bytes) detected\n", 1291 __func__, atomic_read(&sk->sk_omem_alloc)); 1292 1293 if (sk->sk_peer_cred) 1294 put_cred(sk->sk_peer_cred); 1295 put_pid(sk->sk_peer_pid); 1296 put_net(sock_net(sk)); 1297 sk_prot_free(sk->sk_prot_creator, sk); 1298} 1299 1300void sk_free(struct sock *sk) 1301{ 1302 /* 1303 * We subtract one from sk_wmem_alloc and can know if 1304 * some packets are still in some tx queue. 1305 * If not null, sock_wfree() will call __sk_free(sk) later 1306 */ 1307 if (atomic_dec_and_test(&sk->sk_wmem_alloc)) 1308 __sk_free(sk); 1309} 1310EXPORT_SYMBOL(sk_free); 1311 1312/* 1313 * Last sock_put should drop reference to sk->sk_net. It has already 1314 * been dropped in sk_change_net. Taking reference to stopping namespace 1315 * is not an option. 1316 * Take reference to a socket to remove it from hash _alive_ and after that 1317 * destroy it in the context of init_net. 1318 */ 1319void sk_release_kernel(struct sock *sk) 1320{ 1321 if (sk == NULL || sk->sk_socket == NULL) 1322 return; 1323 1324 sock_hold(sk); 1325 sock_release(sk->sk_socket); 1326 release_net(sock_net(sk)); 1327 sock_net_set(sk, get_net(&init_net)); 1328 sock_put(sk); 1329} 1330EXPORT_SYMBOL(sk_release_kernel); 1331 1332static void sk_update_clone(const struct sock *sk, struct sock *newsk) 1333{ 1334 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) 1335 sock_update_memcg(newsk); 1336} 1337 1338/** 1339 * sk_clone_lock - clone a socket, and lock its clone 1340 * @sk: the socket to clone 1341 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1342 * 1343 * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 1344 */ 1345struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 1346{ 1347 struct sock *newsk; 1348 1349 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); 1350 if (newsk != NULL) { 1351 struct sk_filter *filter; 1352 1353 sock_copy(newsk, sk); 1354 1355 /* SANITY */ 1356 get_net(sock_net(newsk)); 1357 sk_node_init(&newsk->sk_node); 1358 sock_lock_init(newsk); 1359 bh_lock_sock(newsk); 1360 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1361 newsk->sk_backlog.len = 0; 1362 1363 atomic_set(&newsk->sk_rmem_alloc, 0); 1364 /* 1365 * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) 1366 */ 1367 atomic_set(&newsk->sk_wmem_alloc, 1); 1368 atomic_set(&newsk->sk_omem_alloc, 0); 1369 skb_queue_head_init(&newsk->sk_receive_queue); 1370 skb_queue_head_init(&newsk->sk_write_queue); 1371#ifdef CONFIG_NET_DMA 1372 skb_queue_head_init(&newsk->sk_async_wait_queue); 1373#endif 1374 1375 spin_lock_init(&newsk->sk_dst_lock); 1376 rwlock_init(&newsk->sk_callback_lock); 1377 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1378 af_callback_keys + newsk->sk_family, 1379 af_family_clock_key_strings[newsk->sk_family]); 1380 1381 newsk->sk_dst_cache = NULL; 1382 newsk->sk_wmem_queued = 0; 1383 newsk->sk_forward_alloc = 0; 1384 newsk->sk_send_head = NULL; 1385 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1386 1387 sock_reset_flag(newsk, SOCK_DONE); 1388 skb_queue_head_init(&newsk->sk_error_queue); 1389 1390 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1391 if (filter != NULL) 1392 sk_filter_charge(newsk, filter); 1393 1394 if (unlikely(xfrm_sk_clone_policy(newsk))) { 1395 /* It is still raw copy of parent, so invalidate 1396 * destructor and make plain sk_free() */ 1397 newsk->sk_destruct = NULL; 1398 bh_unlock_sock(newsk); 1399 sk_free(newsk); 1400 newsk = NULL; 1401 goto out; 1402 } 1403 1404 newsk->sk_err = 0; 1405 newsk->sk_priority = 0; 1406 /* 1407 * Before updating sk_refcnt, we must commit prior changes to memory 1408 * (Documentation/RCU/rculist_nulls.txt for details) 1409 */ 1410 smp_wmb(); 1411 atomic_set(&newsk->sk_refcnt, 2); 1412 1413 /* 1414 * Increment the counter in the same struct proto as the master 1415 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that 1416 * is the same as sk->sk_prot->socks, as this field was copied 1417 * with memcpy). 1418 * 1419 * This _changes_ the previous behaviour, where 1420 * tcp_create_openreq_child always was incrementing the 1421 * equivalent to tcp_prot->socks (inet_sock_nr), so this have 1422 * to be taken into account in all callers. -acme 1423 */ 1424 sk_refcnt_debug_inc(newsk); 1425 sk_set_socket(newsk, NULL); 1426 newsk->sk_wq = NULL; 1427 1428 sk_update_clone(sk, newsk); 1429 1430 if (newsk->sk_prot->sockets_allocated) 1431 sk_sockets_allocated_inc(newsk); 1432 1433 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) 1434 net_enable_timestamp(); 1435 } 1436out: 1437 return newsk; 1438} 1439EXPORT_SYMBOL_GPL(sk_clone_lock); 1440 1441void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1442{ 1443 __sk_dst_set(sk, dst); 1444 sk->sk_route_caps = dst->dev->features; 1445 if (sk->sk_route_caps & NETIF_F_GSO) 1446 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1447 sk->sk_route_caps &= ~sk->sk_route_nocaps; 1448 if (sk_can_gso(sk)) { 1449 if (dst->header_len) { 1450 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1451 } else { 1452 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1453 sk->sk_gso_max_size = dst->dev->gso_max_size; 1454 sk->sk_gso_max_segs = dst->dev->gso_max_segs; 1455 } 1456 } 1457} 1458EXPORT_SYMBOL_GPL(sk_setup_caps); 1459 1460void __init sk_init(void) 1461{ 1462 if (totalram_pages <= 4096) { 1463 sysctl_wmem_max = 32767; 1464 sysctl_rmem_max = 32767; 1465 sysctl_wmem_default = 32767; 1466 sysctl_rmem_default = 32767; 1467 } else if (totalram_pages >= 131072) { 1468 sysctl_wmem_max = 131071; 1469 sysctl_rmem_max = 131071; 1470 } 1471} 1472 1473/* 1474 * Simple resource managers for sockets. 1475 */ 1476 1477 1478/* 1479 * Write buffer destructor automatically called from kfree_skb. 1480 */ 1481void sock_wfree(struct sk_buff *skb) 1482{ 1483 struct sock *sk = skb->sk; 1484 unsigned int len = skb->truesize; 1485 1486 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 1487 /* 1488 * Keep a reference on sk_wmem_alloc, this will be released 1489 * after sk_write_space() call 1490 */ 1491 atomic_sub(len - 1, &sk->sk_wmem_alloc); 1492 sk->sk_write_space(sk); 1493 len = 1; 1494 } 1495 /* 1496 * if sk_wmem_alloc reaches 0, we must finish what sk_free() 1497 * could not do because of in-flight packets 1498 */ 1499 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) 1500 __sk_free(sk); 1501} 1502EXPORT_SYMBOL(sock_wfree); 1503 1504/* 1505 * Read buffer destructor automatically called from kfree_skb. 1506 */ 1507void sock_rfree(struct sk_buff *skb) 1508{ 1509 struct sock *sk = skb->sk; 1510 unsigned int len = skb->truesize; 1511 1512 atomic_sub(len, &sk->sk_rmem_alloc); 1513 sk_mem_uncharge(sk, len); 1514} 1515EXPORT_SYMBOL(sock_rfree); 1516 1517void sock_edemux(struct sk_buff *skb) 1518{ 1519 sock_put(skb->sk); 1520} 1521EXPORT_SYMBOL(sock_edemux); 1522 1523int sock_i_uid(struct sock *sk) 1524{ 1525 int uid; 1526 1527 read_lock_bh(&sk->sk_callback_lock); 1528 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1529 read_unlock_bh(&sk->sk_callback_lock); 1530 return uid; 1531} 1532EXPORT_SYMBOL(sock_i_uid); 1533 1534unsigned long sock_i_ino(struct sock *sk) 1535{ 1536 unsigned long ino; 1537 1538 read_lock_bh(&sk->sk_callback_lock); 1539 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1540 read_unlock_bh(&sk->sk_callback_lock); 1541 return ino; 1542} 1543EXPORT_SYMBOL(sock_i_ino); 1544 1545/* 1546 * Allocate a skb from the socket's send buffer. 1547 */ 1548struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 1549 gfp_t priority) 1550{ 1551 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1552 struct sk_buff *skb = alloc_skb(size, priority); 1553 if (skb) { 1554 skb_set_owner_w(skb, sk); 1555 return skb; 1556 } 1557 } 1558 return NULL; 1559} 1560EXPORT_SYMBOL(sock_wmalloc); 1561 1562/* 1563 * Allocate a skb from the socket's receive buffer. 1564 */ 1565struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, 1566 gfp_t priority) 1567{ 1568 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 1569 struct sk_buff *skb = alloc_skb(size, priority); 1570 if (skb) { 1571 skb_set_owner_r(skb, sk); 1572 return skb; 1573 } 1574 } 1575 return NULL; 1576} 1577 1578/* 1579 * Allocate a memory block from the socket's option memory buffer. 1580 */ 1581void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1582{ 1583 if ((unsigned int)size <= sysctl_optmem_max && 1584 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 1585 void *mem; 1586 /* First do the add, to avoid the race if kmalloc 1587 * might sleep. 1588 */ 1589 atomic_add(size, &sk->sk_omem_alloc); 1590 mem = kmalloc(size, priority); 1591 if (mem) 1592 return mem; 1593 atomic_sub(size, &sk->sk_omem_alloc); 1594 } 1595 return NULL; 1596} 1597EXPORT_SYMBOL(sock_kmalloc); 1598 1599/* 1600 * Free an option memory block. 1601 */ 1602void sock_kfree_s(struct sock *sk, void *mem, int size) 1603{ 1604 kfree(mem); 1605 atomic_sub(size, &sk->sk_omem_alloc); 1606} 1607EXPORT_SYMBOL(sock_kfree_s); 1608 1609/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 1610 I think, these locks should be removed for datagram sockets. 1611 */ 1612static long sock_wait_for_wmem(struct sock *sk, long timeo) 1613{ 1614 DEFINE_WAIT(wait); 1615 1616 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1617 for (;;) { 1618 if (!timeo) 1619 break; 1620 if (signal_pending(current)) 1621 break; 1622 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1623 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1624 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1625 break; 1626 if (sk->sk_shutdown & SEND_SHUTDOWN) 1627 break; 1628 if (sk->sk_err) 1629 break; 1630 timeo = schedule_timeout(timeo); 1631 } 1632 finish_wait(sk_sleep(sk), &wait); 1633 return timeo; 1634} 1635 1636 1637/* 1638 * Generic send/receive buffer handlers 1639 */ 1640 1641struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 1642 unsigned long data_len, int noblock, 1643 int *errcode) 1644{ 1645 struct sk_buff *skb; 1646 gfp_t gfp_mask; 1647 long timeo; 1648 int err; 1649 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 1650 1651 err = -EMSGSIZE; 1652 if (npages > MAX_SKB_FRAGS) 1653 goto failure; 1654 1655 gfp_mask = sk->sk_allocation; 1656 if (gfp_mask & __GFP_WAIT) 1657 gfp_mask |= __GFP_REPEAT; 1658 1659 timeo = sock_sndtimeo(sk, noblock); 1660 while (1) { 1661 err = sock_error(sk); 1662 if (err != 0) 1663 goto failure; 1664 1665 err = -EPIPE; 1666 if (sk->sk_shutdown & SEND_SHUTDOWN) 1667 goto failure; 1668 1669 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1670 skb = alloc_skb(header_len, gfp_mask); 1671 if (skb) { 1672 int i; 1673 1674 /* No pages, we're done... */ 1675 if (!data_len) 1676 break; 1677 1678 skb->truesize += data_len; 1679 skb_shinfo(skb)->nr_frags = npages; 1680 for (i = 0; i < npages; i++) { 1681 struct page *page; 1682 1683 page = alloc_pages(sk->sk_allocation, 0); 1684 if (!page) { 1685 err = -ENOBUFS; 1686 skb_shinfo(skb)->nr_frags = i; 1687 kfree_skb(skb); 1688 goto failure; 1689 } 1690 1691 __skb_fill_page_desc(skb, i, 1692 page, 0, 1693 (data_len >= PAGE_SIZE ? 1694 PAGE_SIZE : 1695 data_len)); 1696 data_len -= PAGE_SIZE; 1697 } 1698 1699 /* Full success... */ 1700 break; 1701 } 1702 err = -ENOBUFS; 1703 goto failure; 1704 } 1705 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1706 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1707 err = -EAGAIN; 1708 if (!timeo) 1709 goto failure; 1710 if (signal_pending(current)) 1711 goto interrupted; 1712 timeo = sock_wait_for_wmem(sk, timeo); 1713 } 1714 1715 skb_set_owner_w(skb, sk); 1716 return skb; 1717 1718interrupted: 1719 err = sock_intr_errno(timeo); 1720failure: 1721 *errcode = err; 1722 return NULL; 1723} 1724EXPORT_SYMBOL(sock_alloc_send_pskb); 1725 1726struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1727 int noblock, int *errcode) 1728{ 1729 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1730} 1731EXPORT_SYMBOL(sock_alloc_send_skb); 1732 1733static void __lock_sock(struct sock *sk) 1734 __releases(&sk->sk_lock.slock) 1735 __acquires(&sk->sk_lock.slock) 1736{ 1737 DEFINE_WAIT(wait); 1738 1739 for (;;) { 1740 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 1741 TASK_UNINTERRUPTIBLE); 1742 spin_unlock_bh(&sk->sk_lock.slock); 1743 schedule(); 1744 spin_lock_bh(&sk->sk_lock.slock); 1745 if (!sock_owned_by_user(sk)) 1746 break; 1747 } 1748 finish_wait(&sk->sk_lock.wq, &wait); 1749} 1750 1751static void __release_sock(struct sock *sk) 1752 __releases(&sk->sk_lock.slock) 1753 __acquires(&sk->sk_lock.slock) 1754{ 1755 struct sk_buff *skb = sk->sk_backlog.head; 1756 1757 do { 1758 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 1759 bh_unlock_sock(sk); 1760 1761 do { 1762 struct sk_buff *next = skb->next; 1763 1764 prefetch(next); 1765 WARN_ON_ONCE(skb_dst_is_noref(skb)); 1766 skb->next = NULL; 1767 sk_backlog_rcv(sk, skb); 1768 1769 /* 1770 * We are in process context here with softirqs 1771 * disabled, use cond_resched_softirq() to preempt. 1772 * This is safe to do because we've taken the backlog 1773 * queue private: 1774 */ 1775 cond_resched_softirq(); 1776 1777 skb = next; 1778 } while (skb != NULL); 1779 1780 bh_lock_sock(sk); 1781 } while ((skb = sk->sk_backlog.head) != NULL); 1782 1783 /* 1784 * Doing the zeroing here guarantee we can not loop forever 1785 * while a wild producer attempts to flood us. 1786 */ 1787 sk->sk_backlog.len = 0; 1788} 1789 1790/** 1791 * sk_wait_data - wait for data to arrive at sk_receive_queue 1792 * @sk: sock to wait on 1793 * @timeo: for how long 1794 * 1795 * Now socket state including sk->sk_err is changed only under lock, 1796 * hence we may omit checks after joining wait queue. 1797 * We check receive queue before schedule() only as optimization; 1798 * it is very likely that release_sock() added new data. 1799 */ 1800int sk_wait_data(struct sock *sk, long *timeo) 1801{ 1802 int rc; 1803 DEFINE_WAIT(wait); 1804 1805 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1806 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1807 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1808 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1809 finish_wait(sk_sleep(sk), &wait); 1810 return rc; 1811} 1812EXPORT_SYMBOL(sk_wait_data); 1813 1814/** 1815 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 1816 * @sk: socket 1817 * @size: memory size to allocate 1818 * @kind: allocation type 1819 * 1820 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 1821 * rmem allocation. This function assumes that protocols which have 1822 * memory_pressure use sk_wmem_queued as write buffer accounting. 1823 */ 1824int __sk_mem_schedule(struct sock *sk, int size, int kind) 1825{ 1826 struct proto *prot = sk->sk_prot; 1827 int amt = sk_mem_pages(size); 1828 long allocated; 1829 int parent_status = UNDER_LIMIT; 1830 1831 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1832 1833 allocated = sk_memory_allocated_add(sk, amt, &parent_status); 1834 1835 /* Under limit. */ 1836 if (parent_status == UNDER_LIMIT && 1837 allocated <= sk_prot_mem_limits(sk, 0)) { 1838 sk_leave_memory_pressure(sk); 1839 return 1; 1840 } 1841 1842 /* Under pressure. (we or our parents) */ 1843 if ((parent_status > SOFT_LIMIT) || 1844 allocated > sk_prot_mem_limits(sk, 1)) 1845 sk_enter_memory_pressure(sk); 1846 1847 /* Over hard limit (we or our parents) */ 1848 if ((parent_status == OVER_LIMIT) || 1849 (allocated > sk_prot_mem_limits(sk, 2))) 1850 goto suppress_allocation; 1851 1852 /* guarantee minimum buffer size under pressure */ 1853 if (kind == SK_MEM_RECV) { 1854 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 1855 return 1; 1856 1857 } else { /* SK_MEM_SEND */ 1858 if (sk->sk_type == SOCK_STREAM) { 1859 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 1860 return 1; 1861 } else if (atomic_read(&sk->sk_wmem_alloc) < 1862 prot->sysctl_wmem[0]) 1863 return 1; 1864 } 1865 1866 if (sk_has_memory_pressure(sk)) { 1867 int alloc; 1868 1869 if (!sk_under_memory_pressure(sk)) 1870 return 1; 1871 alloc = sk_sockets_allocated_read_positive(sk); 1872 if (sk_prot_mem_limits(sk, 2) > alloc * 1873 sk_mem_pages(sk->sk_wmem_queued + 1874 atomic_read(&sk->sk_rmem_alloc) + 1875 sk->sk_forward_alloc)) 1876 return 1; 1877 } 1878 1879suppress_allocation: 1880 1881 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { 1882 sk_stream_moderate_sndbuf(sk); 1883 1884 /* Fail only if socket is _under_ its sndbuf. 1885 * In this case we cannot block, so that we have to fail. 1886 */ 1887 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) 1888 return 1; 1889 } 1890 1891 trace_sock_exceed_buf_limit(sk, prot, allocated); 1892 1893 /* Alas. Undo changes. */ 1894 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1895 1896 sk_memory_allocated_sub(sk, amt); 1897 1898 return 0; 1899} 1900EXPORT_SYMBOL(__sk_mem_schedule); 1901 1902/** 1903 * __sk_reclaim - reclaim memory_allocated 1904 * @sk: socket 1905 */ 1906void __sk_mem_reclaim(struct sock *sk) 1907{ 1908 sk_memory_allocated_sub(sk, 1909 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); 1910 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1911 1912 if (sk_under_memory_pressure(sk) && 1913 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 1914 sk_leave_memory_pressure(sk); 1915} 1916EXPORT_SYMBOL(__sk_mem_reclaim); 1917 1918 1919/* 1920 * Set of default routines for initialising struct proto_ops when 1921 * the protocol does not support a particular function. In certain 1922 * cases where it makes no sense for a protocol to have a "do nothing" 1923 * function, some default processing is provided. 1924 */ 1925 1926int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 1927{ 1928 return -EOPNOTSUPP; 1929} 1930EXPORT_SYMBOL(sock_no_bind); 1931 1932int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 1933 int len, int flags) 1934{ 1935 return -EOPNOTSUPP; 1936} 1937EXPORT_SYMBOL(sock_no_connect); 1938 1939int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 1940{ 1941 return -EOPNOTSUPP; 1942} 1943EXPORT_SYMBOL(sock_no_socketpair); 1944 1945int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 1946{ 1947 return -EOPNOTSUPP; 1948} 1949EXPORT_SYMBOL(sock_no_accept); 1950 1951int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 1952 int *len, int peer) 1953{ 1954 return -EOPNOTSUPP; 1955} 1956EXPORT_SYMBOL(sock_no_getname); 1957 1958unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 1959{ 1960 return 0; 1961} 1962EXPORT_SYMBOL(sock_no_poll); 1963 1964int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1965{ 1966 return -EOPNOTSUPP; 1967} 1968EXPORT_SYMBOL(sock_no_ioctl); 1969 1970int sock_no_listen(struct socket *sock, int backlog) 1971{ 1972 return -EOPNOTSUPP; 1973} 1974EXPORT_SYMBOL(sock_no_listen); 1975 1976int sock_no_shutdown(struct socket *sock, int how) 1977{ 1978 return -EOPNOTSUPP; 1979} 1980EXPORT_SYMBOL(sock_no_shutdown); 1981 1982int sock_no_setsockopt(struct socket *sock, int level, int optname, 1983 char __user *optval, unsigned int optlen) 1984{ 1985 return -EOPNOTSUPP; 1986} 1987EXPORT_SYMBOL(sock_no_setsockopt); 1988 1989int sock_no_getsockopt(struct socket *sock, int level, int optname, 1990 char __user *optval, int __user *optlen) 1991{ 1992 return -EOPNOTSUPP; 1993} 1994EXPORT_SYMBOL(sock_no_getsockopt); 1995 1996int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1997 size_t len) 1998{ 1999 return -EOPNOTSUPP; 2000} 2001EXPORT_SYMBOL(sock_no_sendmsg); 2002 2003int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 2004 size_t len, int flags) 2005{ 2006 return -EOPNOTSUPP; 2007} 2008EXPORT_SYMBOL(sock_no_recvmsg); 2009 2010int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 2011{ 2012 /* Mirror missing mmap method error code */ 2013 return -ENODEV; 2014} 2015EXPORT_SYMBOL(sock_no_mmap); 2016 2017ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 2018{ 2019 ssize_t res; 2020 struct msghdr msg = {.msg_flags = flags}; 2021 struct kvec iov; 2022 char *kaddr = kmap(page); 2023 iov.iov_base = kaddr + offset; 2024 iov.iov_len = size; 2025 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 2026 kunmap(page); 2027 return res; 2028} 2029EXPORT_SYMBOL(sock_no_sendpage); 2030 2031/* 2032 * Default Socket Callbacks 2033 */ 2034 2035static void sock_def_wakeup(struct sock *sk) 2036{ 2037 struct socket_wq *wq; 2038 2039 rcu_read_lock(); 2040 wq = rcu_dereference(sk->sk_wq); 2041 if (wq_has_sleeper(wq)) 2042 wake_up_interruptible_all(&wq->wait); 2043 rcu_read_unlock(); 2044} 2045 2046static void sock_def_error_report(struct sock *sk) 2047{ 2048 struct socket_wq *wq; 2049 2050 rcu_read_lock(); 2051 wq = rcu_dereference(sk->sk_wq); 2052 if (wq_has_sleeper(wq)) 2053 wake_up_interruptible_poll(&wq->wait, POLLERR); 2054 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 2055 rcu_read_unlock(); 2056} 2057 2058static void sock_def_readable(struct sock *sk, int len) 2059{ 2060 struct socket_wq *wq; 2061 2062 rcu_read_lock(); 2063 wq = rcu_dereference(sk->sk_wq); 2064 if (wq_has_sleeper(wq)) 2065 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 2066 POLLRDNORM | POLLRDBAND); 2067 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 2068 rcu_read_unlock(); 2069} 2070 2071static void sock_def_write_space(struct sock *sk) 2072{ 2073 struct socket_wq *wq; 2074 2075 rcu_read_lock(); 2076 2077 /* Do not wake up a writer until he can make "significant" 2078 * progress. --DaveM 2079 */ 2080 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 2081 wq = rcu_dereference(sk->sk_wq); 2082 if (wq_has_sleeper(wq)) 2083 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 2084 POLLWRNORM | POLLWRBAND); 2085 2086 /* Should agree with poll, otherwise some programs break */ 2087 if (sock_writeable(sk)) 2088 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 2089 } 2090 2091 rcu_read_unlock(); 2092} 2093 2094static void sock_def_destruct(struct sock *sk) 2095{ 2096 kfree(sk->sk_protinfo); 2097} 2098 2099void sk_send_sigurg(struct sock *sk) 2100{ 2101 if (sk->sk_socket && sk->sk_socket->file) 2102 if (send_sigurg(&sk->sk_socket->file->f_owner)) 2103 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 2104} 2105EXPORT_SYMBOL(sk_send_sigurg); 2106 2107void sk_reset_timer(struct sock *sk, struct timer_list* timer, 2108 unsigned long expires) 2109{ 2110 if (!mod_timer(timer, expires)) 2111 sock_hold(sk); 2112} 2113EXPORT_SYMBOL(sk_reset_timer); 2114 2115void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2116{ 2117 if (timer_pending(timer) && del_timer(timer)) 2118 __sock_put(sk); 2119} 2120EXPORT_SYMBOL(sk_stop_timer); 2121 2122void sock_init_data(struct socket *sock, struct sock *sk) 2123{ 2124 skb_queue_head_init(&sk->sk_receive_queue); 2125 skb_queue_head_init(&sk->sk_write_queue); 2126 skb_queue_head_init(&sk->sk_error_queue); 2127#ifdef CONFIG_NET_DMA 2128 skb_queue_head_init(&sk->sk_async_wait_queue); 2129#endif 2130 2131 sk->sk_send_head = NULL; 2132 2133 init_timer(&sk->sk_timer); 2134 2135 sk->sk_allocation = GFP_KERNEL; 2136 sk->sk_rcvbuf = sysctl_rmem_default; 2137 sk->sk_sndbuf = sysctl_wmem_default; 2138 sk->sk_state = TCP_CLOSE; 2139 sk_set_socket(sk, sock); 2140 2141 sock_set_flag(sk, SOCK_ZAPPED); 2142 2143 if (sock) { 2144 sk->sk_type = sock->type; 2145 sk->sk_wq = sock->wq; 2146 sock->sk = sk; 2147 } else 2148 sk->sk_wq = NULL; 2149 2150 spin_lock_init(&sk->sk_dst_lock); 2151 rwlock_init(&sk->sk_callback_lock); 2152 lockdep_set_class_and_name(&sk->sk_callback_lock, 2153 af_callback_keys + sk->sk_family, 2154 af_family_clock_key_strings[sk->sk_family]); 2155 2156 sk->sk_state_change = sock_def_wakeup; 2157 sk->sk_data_ready = sock_def_readable; 2158 sk->sk_write_space = sock_def_write_space; 2159 sk->sk_error_report = sock_def_error_report; 2160 sk->sk_destruct = sock_def_destruct; 2161 2162 sk->sk_sndmsg_page = NULL; 2163 sk->sk_sndmsg_off = 0; 2164 sk->sk_peek_off = -1; 2165 2166 sk->sk_peer_pid = NULL; 2167 sk->sk_peer_cred = NULL; 2168 sk->sk_write_pending = 0; 2169 sk->sk_rcvlowat = 1; 2170 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 2171 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 2172 2173 sk->sk_stamp = ktime_set(-1L, 0); 2174 2175 /* 2176 * Before updating sk_refcnt, we must commit prior changes to memory 2177 * (Documentation/RCU/rculist_nulls.txt for details) 2178 */ 2179 smp_wmb(); 2180 atomic_set(&sk->sk_refcnt, 1); 2181 atomic_set(&sk->sk_drops, 0); 2182} 2183EXPORT_SYMBOL(sock_init_data); 2184 2185void lock_sock_nested(struct sock *sk, int subclass) 2186{ 2187 might_sleep(); 2188 spin_lock_bh(&sk->sk_lock.slock); 2189 if (sk->sk_lock.owned) 2190 __lock_sock(sk); 2191 sk->sk_lock.owned = 1; 2192 spin_unlock(&sk->sk_lock.slock); 2193 /* 2194 * The sk_lock has mutex_lock() semantics here: 2195 */ 2196 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 2197 local_bh_enable(); 2198} 2199EXPORT_SYMBOL(lock_sock_nested); 2200 2201void release_sock(struct sock *sk) 2202{ 2203 /* 2204 * The sk_lock has mutex_unlock() semantics: 2205 */ 2206 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 2207 2208 spin_lock_bh(&sk->sk_lock.slock); 2209 if (sk->sk_backlog.tail) 2210 __release_sock(sk); 2211 2212 if (sk->sk_prot->release_cb) 2213 sk->sk_prot->release_cb(sk); 2214 2215 sk->sk_lock.owned = 0; 2216 if (waitqueue_active(&sk->sk_lock.wq)) 2217 wake_up(&sk->sk_lock.wq); 2218 spin_unlock_bh(&sk->sk_lock.slock); 2219} 2220EXPORT_SYMBOL(release_sock); 2221 2222/** 2223 * lock_sock_fast - fast version of lock_sock 2224 * @sk: socket 2225 * 2226 * This version should be used for very small section, where process wont block 2227 * return false if fast path is taken 2228 * sk_lock.slock locked, owned = 0, BH disabled 2229 * return true if slow path is taken 2230 * sk_lock.slock unlocked, owned = 1, BH enabled 2231 */ 2232bool lock_sock_fast(struct sock *sk) 2233{ 2234 might_sleep(); 2235 spin_lock_bh(&sk->sk_lock.slock); 2236 2237 if (!sk->sk_lock.owned) 2238 /* 2239 * Note : We must disable BH 2240 */ 2241 return false; 2242 2243 __lock_sock(sk); 2244 sk->sk_lock.owned = 1; 2245 spin_unlock(&sk->sk_lock.slock); 2246 /* 2247 * The sk_lock has mutex_lock() semantics here: 2248 */ 2249 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); 2250 local_bh_enable(); 2251 return true; 2252} 2253EXPORT_SYMBOL(lock_sock_fast); 2254 2255int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2256{ 2257 struct timeval tv; 2258 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2259 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2260 tv = ktime_to_timeval(sk->sk_stamp); 2261 if (tv.tv_sec == -1) 2262 return -ENOENT; 2263 if (tv.tv_sec == 0) { 2264 sk->sk_stamp = ktime_get_real(); 2265 tv = ktime_to_timeval(sk->sk_stamp); 2266 } 2267 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; 2268} 2269EXPORT_SYMBOL(sock_get_timestamp); 2270 2271int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 2272{ 2273 struct timespec ts; 2274 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2275 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2276 ts = ktime_to_timespec(sk->sk_stamp); 2277 if (ts.tv_sec == -1) 2278 return -ENOENT; 2279 if (ts.tv_sec == 0) { 2280 sk->sk_stamp = ktime_get_real(); 2281 ts = ktime_to_timespec(sk->sk_stamp); 2282 } 2283 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; 2284} 2285EXPORT_SYMBOL(sock_get_timestampns); 2286 2287void sock_enable_timestamp(struct sock *sk, int flag) 2288{ 2289 if (!sock_flag(sk, flag)) { 2290 unsigned long previous_flags = sk->sk_flags; 2291 2292 sock_set_flag(sk, flag); 2293 /* 2294 * we just set one of the two flags which require net 2295 * time stamping, but time stamping might have been on 2296 * already because of the other one 2297 */ 2298 if (!(previous_flags & SK_FLAGS_TIMESTAMP)) 2299 net_enable_timestamp(); 2300 } 2301} 2302 2303/* 2304 * Get a socket option on an socket. 2305 * 2306 * FIX: POSIX 1003.1g is very ambiguous here. It states that 2307 * asynchronous errors should be reported by getsockopt. We assume 2308 * this means if you specify SO_ERROR (otherwise whats the point of it). 2309 */ 2310int sock_common_getsockopt(struct socket *sock, int level, int optname, 2311 char __user *optval, int __user *optlen) 2312{ 2313 struct sock *sk = sock->sk; 2314 2315 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2316} 2317EXPORT_SYMBOL(sock_common_getsockopt); 2318 2319#ifdef CONFIG_COMPAT 2320int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, 2321 char __user *optval, int __user *optlen) 2322{ 2323 struct sock *sk = sock->sk; 2324 2325 if (sk->sk_prot->compat_getsockopt != NULL) 2326 return sk->sk_prot->compat_getsockopt(sk, level, optname, 2327 optval, optlen); 2328 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2329} 2330EXPORT_SYMBOL(compat_sock_common_getsockopt); 2331#endif 2332 2333int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 2334 struct msghdr *msg, size_t size, int flags) 2335{ 2336 struct sock *sk = sock->sk; 2337 int addr_len = 0; 2338 int err; 2339 2340 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, 2341 flags & ~MSG_DONTWAIT, &addr_len); 2342 if (err >= 0) 2343 msg->msg_namelen = addr_len; 2344 return err; 2345} 2346EXPORT_SYMBOL(sock_common_recvmsg); 2347 2348/* 2349 * Set socket options on an inet socket. 2350 */ 2351int sock_common_setsockopt(struct socket *sock, int level, int optname, 2352 char __user *optval, unsigned int optlen) 2353{ 2354 struct sock *sk = sock->sk; 2355 2356 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2357} 2358EXPORT_SYMBOL(sock_common_setsockopt); 2359 2360#ifdef CONFIG_COMPAT 2361int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, 2362 char __user *optval, unsigned int optlen) 2363{ 2364 struct sock *sk = sock->sk; 2365 2366 if (sk->sk_prot->compat_setsockopt != NULL) 2367 return sk->sk_prot->compat_setsockopt(sk, level, optname, 2368 optval, optlen); 2369 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2370} 2371EXPORT_SYMBOL(compat_sock_common_setsockopt); 2372#endif 2373 2374void sk_common_release(struct sock *sk) 2375{ 2376 if (sk->sk_prot->destroy) 2377 sk->sk_prot->destroy(sk); 2378 2379 /* 2380 * Observation: when sock_common_release is called, processes have 2381 * no access to socket. But net still has. 2382 * Step one, detach it from networking: 2383 * 2384 * A. Remove from hash tables. 2385 */ 2386 2387 sk->sk_prot->unhash(sk); 2388 2389 /* 2390 * In this point socket cannot receive new packets, but it is possible 2391 * that some packets are in flight because some CPU runs receiver and 2392 * did hash table lookup before we unhashed socket. They will achieve 2393 * receive queue and will be purged by socket destructor. 2394 * 2395 * Also we still have packets pending on receive queue and probably, 2396 * our own packets waiting in device queues. sock_destroy will drain 2397 * receive queue, but transmitted packets will delay socket destruction 2398 * until the last reference will be released. 2399 */ 2400 2401 sock_orphan(sk); 2402 2403 xfrm_sk_free_policy(sk); 2404 2405 sk_refcnt_debug_release(sk); 2406 sock_put(sk); 2407} 2408EXPORT_SYMBOL(sk_common_release); 2409 2410#ifdef CONFIG_PROC_FS 2411#define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2412struct prot_inuse { 2413 int val[PROTO_INUSE_NR]; 2414}; 2415 2416static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 2417 2418#ifdef CONFIG_NET_NS 2419void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2420{ 2421 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 2422} 2423EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2424 2425int sock_prot_inuse_get(struct net *net, struct proto *prot) 2426{ 2427 int cpu, idx = prot->inuse_idx; 2428 int res = 0; 2429 2430 for_each_possible_cpu(cpu) 2431 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; 2432 2433 return res >= 0 ? res : 0; 2434} 2435EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2436 2437static int __net_init sock_inuse_init_net(struct net *net) 2438{ 2439 net->core.inuse = alloc_percpu(struct prot_inuse); 2440 return net->core.inuse ? 0 : -ENOMEM; 2441} 2442 2443static void __net_exit sock_inuse_exit_net(struct net *net) 2444{ 2445 free_percpu(net->core.inuse); 2446} 2447 2448static struct pernet_operations net_inuse_ops = { 2449 .init = sock_inuse_init_net, 2450 .exit = sock_inuse_exit_net, 2451}; 2452 2453static __init int net_inuse_init(void) 2454{ 2455 if (register_pernet_subsys(&net_inuse_ops)) 2456 panic("Cannot initialize net inuse counters"); 2457 2458 return 0; 2459} 2460 2461core_initcall(net_inuse_init); 2462#else 2463static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); 2464 2465void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2466{ 2467 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); 2468} 2469EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2470 2471int sock_prot_inuse_get(struct net *net, struct proto *prot) 2472{ 2473 int cpu, idx = prot->inuse_idx; 2474 int res = 0; 2475 2476 for_each_possible_cpu(cpu) 2477 res += per_cpu(prot_inuse, cpu).val[idx]; 2478 2479 return res >= 0 ? res : 0; 2480} 2481EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2482#endif 2483 2484static void assign_proto_idx(struct proto *prot) 2485{ 2486 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); 2487 2488 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { 2489 pr_err("PROTO_INUSE_NR exhausted\n"); 2490 return; 2491 } 2492 2493 set_bit(prot->inuse_idx, proto_inuse_idx); 2494} 2495 2496static void release_proto_idx(struct proto *prot) 2497{ 2498 if (prot->inuse_idx != PROTO_INUSE_NR - 1) 2499 clear_bit(prot->inuse_idx, proto_inuse_idx); 2500} 2501#else 2502static inline void assign_proto_idx(struct proto *prot) 2503{ 2504} 2505 2506static inline void release_proto_idx(struct proto *prot) 2507{ 2508} 2509#endif 2510 2511int proto_register(struct proto *prot, int alloc_slab) 2512{ 2513 if (alloc_slab) { 2514 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 2515 SLAB_HWCACHE_ALIGN | prot->slab_flags, 2516 NULL); 2517 2518 if (prot->slab == NULL) { 2519 pr_crit("%s: Can't create sock SLAB cache!\n", 2520 prot->name); 2521 goto out; 2522 } 2523 2524 if (prot->rsk_prot != NULL) { 2525 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); 2526 if (prot->rsk_prot->slab_name == NULL) 2527 goto out_free_sock_slab; 2528 2529 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, 2530 prot->rsk_prot->obj_size, 0, 2531 SLAB_HWCACHE_ALIGN, NULL); 2532 2533 if (prot->rsk_prot->slab == NULL) { 2534 pr_crit("%s: Can't create request sock SLAB cache!\n", 2535 prot->name); 2536 goto out_free_request_sock_slab_name; 2537 } 2538 } 2539 2540 if (prot->twsk_prot != NULL) { 2541 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); 2542 2543 if (prot->twsk_prot->twsk_slab_name == NULL) 2544 goto out_free_request_sock_slab; 2545 2546 prot->twsk_prot->twsk_slab = 2547 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2548 prot->twsk_prot->twsk_obj_size, 2549 0, 2550 SLAB_HWCACHE_ALIGN | 2551 prot->slab_flags, 2552 NULL); 2553 if (prot->twsk_prot->twsk_slab == NULL) 2554 goto out_free_timewait_sock_slab_name; 2555 } 2556 } 2557 2558 mutex_lock(&proto_list_mutex); 2559 list_add(&prot->node, &proto_list); 2560 assign_proto_idx(prot); 2561 mutex_unlock(&proto_list_mutex); 2562 return 0; 2563 2564out_free_timewait_sock_slab_name: 2565 kfree(prot->twsk_prot->twsk_slab_name); 2566out_free_request_sock_slab: 2567 if (prot->rsk_prot && prot->rsk_prot->slab) { 2568 kmem_cache_destroy(prot->rsk_prot->slab); 2569 prot->rsk_prot->slab = NULL; 2570 } 2571out_free_request_sock_slab_name: 2572 if (prot->rsk_prot) 2573 kfree(prot->rsk_prot->slab_name); 2574out_free_sock_slab: 2575 kmem_cache_destroy(prot->slab); 2576 prot->slab = NULL; 2577out: 2578 return -ENOBUFS; 2579} 2580EXPORT_SYMBOL(proto_register); 2581 2582void proto_unregister(struct proto *prot) 2583{ 2584 mutex_lock(&proto_list_mutex); 2585 release_proto_idx(prot); 2586 list_del(&prot->node); 2587 mutex_unlock(&proto_list_mutex); 2588 2589 if (prot->slab != NULL) { 2590 kmem_cache_destroy(prot->slab); 2591 prot->slab = NULL; 2592 } 2593 2594 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { 2595 kmem_cache_destroy(prot->rsk_prot->slab); 2596 kfree(prot->rsk_prot->slab_name); 2597 prot->rsk_prot->slab = NULL; 2598 } 2599 2600 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { 2601 kmem_cache_destroy(prot->twsk_prot->twsk_slab); 2602 kfree(prot->twsk_prot->twsk_slab_name); 2603 prot->twsk_prot->twsk_slab = NULL; 2604 } 2605} 2606EXPORT_SYMBOL(proto_unregister); 2607 2608#ifdef CONFIG_PROC_FS 2609static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2610 __acquires(proto_list_mutex) 2611{ 2612 mutex_lock(&proto_list_mutex); 2613 return seq_list_start_head(&proto_list, *pos); 2614} 2615 2616static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2617{ 2618 return seq_list_next(v, &proto_list, pos); 2619} 2620 2621static void proto_seq_stop(struct seq_file *seq, void *v) 2622 __releases(proto_list_mutex) 2623{ 2624 mutex_unlock(&proto_list_mutex); 2625} 2626 2627static char proto_method_implemented(const void *method) 2628{ 2629 return method == NULL ? 'n' : 'y'; 2630} 2631static long sock_prot_memory_allocated(struct proto *proto) 2632{ 2633 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; 2634} 2635 2636static char *sock_prot_memory_pressure(struct proto *proto) 2637{ 2638 return proto->memory_pressure != NULL ? 2639 proto_memory_pressure(proto) ? "yes" : "no" : "NI"; 2640} 2641 2642static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2643{ 2644 2645 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 2646 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2647 proto->name, 2648 proto->obj_size, 2649 sock_prot_inuse_get(seq_file_net(seq), proto), 2650 sock_prot_memory_allocated(proto), 2651 sock_prot_memory_pressure(proto), 2652 proto->max_header, 2653 proto->slab == NULL ? "no" : "yes", 2654 module_name(proto->owner), 2655 proto_method_implemented(proto->close), 2656 proto_method_implemented(proto->connect), 2657 proto_method_implemented(proto->disconnect), 2658 proto_method_implemented(proto->accept), 2659 proto_method_implemented(proto->ioctl), 2660 proto_method_implemented(proto->init), 2661 proto_method_implemented(proto->destroy), 2662 proto_method_implemented(proto->shutdown), 2663 proto_method_implemented(proto->setsockopt), 2664 proto_method_implemented(proto->getsockopt), 2665 proto_method_implemented(proto->sendmsg), 2666 proto_method_implemented(proto->recvmsg), 2667 proto_method_implemented(proto->sendpage), 2668 proto_method_implemented(proto->bind), 2669 proto_method_implemented(proto->backlog_rcv), 2670 proto_method_implemented(proto->hash), 2671 proto_method_implemented(proto->unhash), 2672 proto_method_implemented(proto->get_port), 2673 proto_method_implemented(proto->enter_memory_pressure)); 2674} 2675 2676static int proto_seq_show(struct seq_file *seq, void *v) 2677{ 2678 if (v == &proto_list) 2679 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 2680 "protocol", 2681 "size", 2682 "sockets", 2683 "memory", 2684 "press", 2685 "maxhdr", 2686 "slab", 2687 "module", 2688 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 2689 else 2690 proto_seq_printf(seq, list_entry(v, struct proto, node)); 2691 return 0; 2692} 2693 2694static const struct seq_operations proto_seq_ops = { 2695 .start = proto_seq_start, 2696 .next = proto_seq_next, 2697 .stop = proto_seq_stop, 2698 .show = proto_seq_show, 2699}; 2700 2701static int proto_seq_open(struct inode *inode, struct file *file) 2702{ 2703 return seq_open_net(inode, file, &proto_seq_ops, 2704 sizeof(struct seq_net_private)); 2705} 2706 2707static const struct file_operations proto_seq_fops = { 2708 .owner = THIS_MODULE, 2709 .open = proto_seq_open, 2710 .read = seq_read, 2711 .llseek = seq_lseek, 2712 .release = seq_release_net, 2713}; 2714 2715static __net_init int proto_init_net(struct net *net) 2716{ 2717 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) 2718 return -ENOMEM; 2719 2720 return 0; 2721} 2722 2723static __net_exit void proto_exit_net(struct net *net) 2724{ 2725 proc_net_remove(net, "protocols"); 2726} 2727 2728 2729static __net_initdata struct pernet_operations proto_net_ops = { 2730 .init = proto_init_net, 2731 .exit = proto_exit_net, 2732}; 2733 2734static int __init proto_init(void) 2735{ 2736 return register_pernet_subsys(&proto_net_ops); 2737} 2738 2739subsys_initcall(proto_init); 2740 2741#endif /* PROC_FS */ 2742